diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md
new file mode 100644
index 000000000..8fd955356
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/custom.md
@@ -0,0 +1,185 @@
+---
+name: Issue and enhancement template
+about: Used to report issues and request enhancements for llama.cpp
+title: "[User] Insert summary of your issue or enhancement.."
+labels: ''
+assignees: ''
+
+---
+
+# Prerequisites
+
+Please answer the following questions for yourself before submitting an issue.
+
+- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now.
+- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
+- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed).
+- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share.
+
+# Expected Behavior
+
+Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do.
+
+# Current Behavior
+
+Please provide a detailed written description of what `llama.cpp` did, instead.
+
+# Environment and Context
+
+Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions.
+
+* Physical (or virtual) hardware you are using, e.g. for Linux:
+
+`$ lscpu`
+
+* Operating System, e.g. for Linux:
+
+`$ uname -a`
+
+* SDK version, e.g. for Linux:
+
+```
+$ python3 --version
+$ make --version
+$ g++ --version
+```
+
+# Failure Information (for bugs)
+
+Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
+
+# Steps to Reproduce
+
+Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better.
+
+1. step 1
+2. step 2
+3. step 3
+4. etc.
+
+# Failure Logs
+
+Please include any relevant log snippets or files. If it works under one configuration but not under another, please provide logs for both configurations and their corresponding outputs so it is easy to see where behavior changes.
+
+Also, please try to **avoid using screenshots** if at all possible. Instead, copy/paste the console output and use [Github's markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to cleanly format your logs for easy readability.
+
+Example environment info:
+```
+llama.cpp$ git log | head -1
+commit 2af23d30434a677c6416812eea52ccc0af65119c
+
+llama.cpp$ lscpu | egrep "AMD|Flags"
+Vendor ID:                       AuthenticAMD
+Model name:                      AMD Ryzen Threadripper 1950X 16-Core Processor
+Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb hw_pstate ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt sha_ni xsaveopt xsavec xgetbv1 xsaves clzero irperf xsaveerptr arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca sme sev
+Virtualization:                  AMD-V
+
+llama.cpp$ python3 --version
+Python 3.10.9
+
+llama.cpp$ pip list | egrep "torch|numpy|sentencepiece"
+numpy                         1.24.2
+numpydoc                      1.5.0
+sentencepiece                 0.1.97
+torch                         1.13.1
+torchvision                   0.14.1
+
+llama.cpp$ make --version | head -1
+GNU Make 4.3
+
+$ md5sum ./models/65B/ggml-model-q4_0.bin
+dbdd682cce80e2d6e93cefc7449df487  ./models/65B/ggml-model-q4_0.bin
+```
+
+Example run with the Linux command [perf](https://www.brendangregg.com/perf.html)
+```
+llama.cpp$ perf stat ./main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p "Please close your issue when it has been answered."
+main: seed = 1679149377
+llama_model_load: loading model from './models/65B/ggml-model-q4_0.bin' - please wait ...
+llama_model_load: n_vocab = 32000
+llama_model_load: n_ctx   = 512
+llama_model_load: n_embd  = 8192
+llama_model_load: n_mult  = 256
+llama_model_load: n_head  = 64
+llama_model_load: n_layer = 80
+llama_model_load: n_rot   = 128
+llama_model_load: f16     = 2
+llama_model_load: n_ff    = 22016
+llama_model_load: n_parts = 8
+llama_model_load: ggml ctx size = 41477.73 MB
+llama_model_load: memory_size =  2560.00 MB, n_mem = 40960
+llama_model_load: loading model part 1/8 from './models/65B/ggml-model-q4_0.bin'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 2/8 from './models/65B/ggml-model-q4_0.bin.1'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 3/8 from './models/65B/ggml-model-q4_0.bin.2'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 4/8 from './models/65B/ggml-model-q4_0.bin.3'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 5/8 from './models/65B/ggml-model-q4_0.bin.4'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 6/8 from './models/65B/ggml-model-q4_0.bin.5'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 7/8 from './models/65B/ggml-model-q4_0.bin.6'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.7'
+llama_model_load: .......................................................................................... done
+llama_model_load: model size =  4869.09 MB / num tensors = 723
+
+system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
+
+main: prompt: 'Please close your issue when it has been answered.'
+main: number of tokens in prompt = 11
+     1 -> ''
+ 12148 -> 'Please'
+  3802 -> ' close'
+   596 -> ' your'
+  2228 -> ' issue'
+   746 -> ' when'
+   372 -> ' it'
+   756 -> ' has'
+  1063 -> ' been'
+  7699 -> ' answered'
+ 29889 -> '.'
+
+sampling parameters: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000
+
+
+Please close your issue when it has been answered.
+@duncan-donut: I'm trying to figure out what kind of "support" you need for this script and why, exactly? Is there a question about how the code works that hasn't already been addressed in one or more comments below this ticket, or are we talking something else entirely like some sorta bugfixing job because your server setup is different from mine??
+I can understand if your site needs to be running smoothly and you need help with a fix of sorts but there should really be nothing wrong here that the code itself could not handle. And given that I'm getting reports about how it works perfectly well on some other servers, what exactly are we talking? A detailed report will do wonders in helping us get this resolved for ya quickly so please take your time and describe the issue(s) you see as clearly & concisely as possible!!
+@duncan-donut: I'm not sure if you have access to cPanel but you could try these instructions. It is worth a shot! Let me know how it goes (or what error message, exactly!) when/if ya give that code a go? [end of text]
+
+
+main: mem per token = 71159620 bytes
+main:     load time = 19309.95 ms
+main:   sample time =   168.62 ms
+main:  predict time = 223895.61 ms / 888.47 ms per token
+main:    total time = 246406.42 ms
+
+ Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.':
+
+        3636882.89 msec task-clock                #   14.677 CPUs utilized
+             13509      context-switches          #    3.714 /sec
+              2436      cpu-migrations            #    0.670 /sec
+          10476679      page-faults               #    2.881 K/sec
+    13133115082869      cycles                    #    3.611 GHz                      (16.77%)
+       29314462753      stalled-cycles-frontend   #    0.22% frontend cycles idle     (16.76%)
+    10294402631459      stalled-cycles-backend    #   78.39% backend cycles idle      (16.74%)
+    23479217109614      instructions              #    1.79  insn per cycle
+                                                  #    0.44  stalled cycles per insn  (16.76%)
+     2353072268027      branches                  #  647.002 M/sec                    (16.77%)
+        1998682780      branch-misses             #    0.08% of all branches          (16.76%)
+
+     247.802177522 seconds time elapsed
+
+    3618.573072000 seconds user
+      18.491698000 seconds sys
+```
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 000000000..84faad37a
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,632 @@
+name: CI
+
+on:
+  workflow_dispatch: # allows manual triggering
+    inputs:
+      create_release:
+        description: 'Create new release'
+        required: true
+        type: boolean
+  push:
+    branches:
+      - master
+    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+
+env:
+  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+  GGML_NLOOP: 3
+  GGML_NITER: 1
+  GGML_N_THREADS: 1
+
+jobs:
+  ubuntu-focal-make:
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential gcc-8
+
+      - name: Build
+        id: make_build
+        run: |
+          CC=gcc-8 make
+
+  ubuntu-latest-cmake:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake ..
+          cmake --build . --config Release
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest --verbose --timeout 900
+
+  ubuntu-latest-cmake-sanitizer:
+    runs-on: ubuntu-latest
+
+    continue-on-error: true
+
+    strategy:
+      matrix:
+        sanitizer: [ADDRESS, THREAD, UNDEFINED]
+        build_type: [Debug, Release]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+          cmake --build . --config ${{ matrix.build_type }}
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest --verbose --timeout 900
+
+  ubuntu-latest-cmake-mpi:
+    runs-on: ubuntu-latest
+
+    continue-on-error: true
+
+    strategy:
+      matrix:
+        mpi_library: [mpich, libopenmpi-dev]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ${{ matrix.mpi_library }}
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake -DLLAMA_MPI=ON ..
+          cmake --build . --config Release
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest --verbose
+
+  macOS-latest-make:
+    runs-on: macos-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        continue-on-error: true
+        run: |
+          brew update
+
+      - name: Build
+        id: make_build
+        run: |
+          make
+
+  macOS-latest-cmake:
+    runs-on: macos-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        continue-on-error: true
+        run: |
+          brew update
+
+      - name: Build
+        id: cmake_build
+        run: |
+          sysctl -a
+          mkdir build
+          cd build
+          cmake -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF ..
+          cmake --build . --config Release
+
+      - name: Test
+        id: cmake_test
+        run: |
+          cd build
+          ctest --verbose --timeout 900
+
+  windows-latest-cmake:
+    runs-on: windows-latest
+
+    env:
+      OPENBLAS_VERSION: 0.3.23
+      OPENCL_VERSION: 2023.04.17
+      CLBLAST_VERSION: 1.6.0
+
+    strategy:
+      matrix:
+        include:
+          - build: 'noavx'
+            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
+          - build: 'avx2'
+            defines: '-DLLAMA_BUILD_SERVER=ON'
+          - build: 'avx'
+            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
+          - build: 'clblast'
+            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
+          - build: 'openblas'
+            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Download OpenCL SDK
+        id: get_opencl
+        if: ${{ matrix.build == 'clblast' }}
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
+          mkdir $env:RUNNER_TEMP/opencl
+          tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
+
+      - name: Download CLBlast
+        id: get_clblast
+        if: ${{ matrix.build == 'clblast' }}
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
+          curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
+          7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
+          rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast
+          foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
+            $txt = Get-Content -Path $f -Raw
+            $txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
+          }
+
+      - name: Download OpenBLAS
+        id: get_openblas
+        if: ${{ matrix.build == 'openblas' }}
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
+          curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
+          mkdir $env:RUNNER_TEMP/openblas
+          tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
+          $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
+          $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
+          $lib =  $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
+          & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake .. ${{ matrix.defines }}
+          cmake --build . --config Release
+
+      - name: Add clblast.dll
+        id: add_clblast_dll
+        if: ${{ matrix.build == 'clblast' }}
+        run: |
+          cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
+          cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
+
+      - name: Add libopenblas.dll
+        id: add_libopenblas_dll
+        if: ${{ matrix.build == 'openblas' }}
+        run: |
+          cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
+          cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
+
+      - name: Check AVX512F support
+        id: check_avx512f
+        if: ${{ matrix.build == 'avx512' }}
+        continue-on-error: true
+        run: |
+          cd build
+          $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
+          $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
+          $cl =  $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
+          echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
+          & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
+          .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
+
+      - name: Test
+        id: cmake_test
+        if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
+        run: |
+          cd build
+          ctest -C Release --verbose --timeout 900
+
+      - name: Get commit hash
+        id: commit
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: pr-mpt/actions-commit-hash@v2
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
+          7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip
+
+  windows-latest-cmake-cublas:
+    runs-on: windows-latest
+
+    strategy:
+      matrix:
+        cuda: ['12.1.0', '11.7.1']
+        build: ['cublas']
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - uses: Jimver/cuda-toolkit@v0.2.10
+        id: cuda-toolkit
+        with:
+          cuda: ${{ matrix.cuda }}
+          # TODO(green-sky): _dev seems to fail, and non dev are not enought
+          #sub-packages: '["nvcc", "cudart", "cublas", "cudart_dev", "cublas_dev"]'
+
+      - name: Build
+        id: cmake_build
+        run: |
+          mkdir build
+          cd build
+          cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON
+          cmake --build . --config Release
+
+      - name: Get commit hash
+        id: commit
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: pr-mpt/actions-commit-hash@v2
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+
+      - name: Copy and pack Cuda runtime
+        if: ${{ matrix.cuda == '12.1.0' }}
+        # TODO(green-sky): paths are cuda 12 specific
+        run: |
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          mkdir '.\build\bin\cudart\'
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_12.dll" '.\build\bin\cudart\'
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_12.dll" '.\build\bin\cudart\'
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_12.dll" '.\build\bin\cudart\'
+          7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
+
+      - name: Copy and pack Cuda runtime
+        if: ${{ matrix.cuda == '11.7.1' }}
+        # TODO(green-sky): paths are cuda 11 specific
+        run: |
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          mkdir '.\build\bin\cudart\'
+          ls "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin"
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_110.dll" '.\build\bin\cudart\'
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_11.dll" '.\build\bin\cudart\'
+          cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_11.dll" '.\build\bin\cudart\'
+          7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
+
+      - name: Upload Cuda runtime
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v3
+        with:
+          path: |
+            cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+
+  release:
+    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+
+    runs-on: ubuntu-latest
+
+    needs:
+      - ubuntu-focal-make
+      - ubuntu-latest-cmake
+      - macOS-latest-make
+      - macOS-latest-cmake
+      - windows-latest-cmake
+      - windows-latest-cmake-cublas
+
+    steps:
+      - name: Download artifacts
+        id: download-artifact
+        uses: actions/download-artifact@v3
+
+      - name: Get commit hash
+        id: commit
+        uses: pr-mpt/actions-commit-hash@v2
+
+      - name: Create release
+        id: create_release
+        uses: anzz1/action-create-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}
+
+      - name: Upload release
+        id: upload_release
+        uses: actions/github-script@v3
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            const path = require('path');
+            const fs = require('fs');
+            const release_id = '${{ steps.create_release.outputs.id }}';
+            for (let file of await fs.readdirSync('./artifact')) {
+              if (path.extname(file) === '.zip') {
+                console.log('uploadReleaseAsset', file);
+                await github.repos.uploadReleaseAsset({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  release_id: release_id,
+                  name: file,
+                  data: await fs.readFileSync(`./artifact/${file}`)
+                });
+              }
+            }
+
+#  ubuntu-latest-gcc:
+#    runs-on: ubuntu-latest
+#
+#    strategy:
+#      matrix:
+#        build: [Debug, Release]
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Dependencies
+#        run: |
+#          sudo apt-get update
+#          sudo apt-get install build-essential
+#          sudo apt-get install cmake
+#
+#      - name: Configure
+#        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#
+#      - name: Build
+#        run: |
+#          make
+#
+#  ubuntu-latest-clang:
+#    runs-on: ubuntu-latest
+#
+#    strategy:
+#      matrix:
+#        build: [Debug, Release]
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Dependencies
+#        run: |
+#          sudo apt-get update
+#          sudo apt-get install build-essential
+#          sudo apt-get install cmake
+#
+#      - name: Configure
+#        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
+#
+#      - name: Build
+#        run: |
+#          make
+#
+#  ubuntu-latest-gcc-sanitized:
+#    runs-on: ubuntu-latest
+#
+#    strategy:
+#      matrix:
+#        sanitizer: [ADDRESS, THREAD, UNDEFINED]
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Dependencies
+#        run: |
+#          sudo apt-get update
+#          sudo apt-get install build-essential
+#          sudo apt-get install cmake
+#
+#      - name: Configure
+#        run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
+#
+#      - name: Build
+#        run: |
+#          make
+#
+#  windows:
+#    runs-on: windows-latest
+#
+#    strategy:
+#      matrix:
+#        build: [Release]
+#        arch: [Win32, x64]
+#        include:
+#          - arch: Win32
+#            s2arc: x86
+#          - arch: x64
+#            s2arc: x64
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Add msbuild to PATH
+#        uses: microsoft/setup-msbuild@v1
+#
+#      - name: Configure
+#        run: >
+#          cmake -S . -B ./build -A ${{ matrix.arch }}
+#          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#
+#      - name: Build
+#        run: |
+#          cd ./build
+#          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+#
+#      - name: Upload binaries
+#        uses: actions/upload-artifact@v1
+#        with:
+#          name: llama-bin-${{ matrix.arch }}
+#          path: build/bin/${{ matrix.build }}
+#
+#  windows-blas:
+#    runs-on: windows-latest
+#
+#    strategy:
+#      matrix:
+#        build: [Release]
+#        arch: [Win32, x64]
+#        blas: [ON]
+#        include:
+#          - arch: Win32
+#            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
+#            s2arc: x86
+#          - arch: x64
+#            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
+#            s2arc: x64
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Add msbuild to PATH
+#        uses: microsoft/setup-msbuild@v1
+#
+#      - name: Fetch OpenBLAS
+#        if: matrix.blas == 'ON'
+#        run: |
+#          C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
+#          7z x blas.zip -oblas -y
+#          copy blas/include/cblas.h .
+#          copy blas/include/openblas_config.h .
+#          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
+#
+#      - name: Configure
+#        run: >
+#          cmake -S . -B ./build -A ${{ matrix.arch }}
+#          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#          -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
+#          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
+#
+#      - name: Build
+#        run: |
+#          cd ./build
+#          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+#
+#      - name: Copy libopenblas.dll
+#        if: matrix.blas == 'ON'
+#        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+#
+#      - name: Upload binaries
+#        if: matrix.blas == 'ON'
+#        uses: actions/upload-artifact@v1
+#        with:
+#          name: llama-blas-bin-${{ matrix.arch }}
+#          path: build/bin/${{ matrix.build }}
+#
+#  emscripten:
+#    runs-on: ubuntu-latest
+#
+#    strategy:
+#      matrix:
+#        build: [Release]
+#
+#    steps:
+#      - name: Clone
+#        uses: actions/checkout@v1
+#
+#      - name: Dependencies
+#        run: |
+#          wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
+#          tar -xvf master.tar.gz
+#          emsdk-master/emsdk update
+#          emsdk-master/emsdk install latest
+#          emsdk-master/emsdk activate latest
+#
+#      - name: Configure
+#        run: echo "tmp"
+#
+#      - name: Build
+#        run: |
+#          pushd emsdk-master
+#          source ./emsdk_env.sh
+#          popd
+#          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#          make
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 000000000..379fbd7ad
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,65 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# GitHub recommends pinning actions to a commit SHA.
+# To get a newer version, you will need to update the SHA.
+# You can also reference a tag or branch, but the action may change without warning.
+
+name: Publish Docker image
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to Docker Hub
+    if: github.event.pull_request.draft == false
+
+    runs-on: ubuntu-latest
+    env:
+      COMMIT_SHA: ${{ github.sha }}
+    strategy:
+      matrix:
+        config:
+          - { tag: "light", dockerfile: ".devops/main.Dockerfile" }
+          - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push Docker image (versioned)
+        if: github.event_name == 'push'
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          platforms: linux/amd64,linux/arm64
+          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
+          file: ${{ matrix.config.dockerfile }}
+
+      - name: Build and push Docker image (tagged)
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: ${{ github.event_name == 'push' }}
+          platforms: linux/amd64,linux/arm64
+          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
+          file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
new file mode 100644
index 000000000..b4e535acf
--- /dev/null
+++ b/.github/workflows/editorconfig.yml
@@ -0,0 +1,17 @@
+name: EditorConfig Checker
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  editorconfig:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: editorconfig-checker/action-editorconfig-checker@main
+      - run: editorconfig-checker
diff --git a/.github/workflows/tidy-post.yml b/.github/workflows/tidy-post.yml
new file mode 100644
index 000000000..03652760c
--- /dev/null
+++ b/.github/workflows/tidy-post.yml
@@ -0,0 +1,20 @@
+name: clang-tidy review post comments
+
+on:
+  workflow_dispatch:
+    workflows: ["clang-tidy-review"]
+    types:
+      - completed
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: ZedThree/clang-tidy-review/post@v0.13.0
+        # lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup
+        with:
+          # adjust options as necessary
+          lgtm_comment_body: ''
+          annotations: false
+          max_comments: 25
diff --git a/.github/workflows/tidy-review.yml b/.github/workflows/tidy-review.yml
new file mode 100644
index 000000000..a4bc8d976
--- /dev/null
+++ b/.github/workflows/tidy-review.yml
@@ -0,0 +1,23 @@
+name: clang-tidy-review
+
+on:
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  clang-tidy-review:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - uses: ZedThree/clang-tidy-review@v0.13.0
+      id: review
+      with:
+        lgtm_comment_body: ''
+        build_dir: build
+        cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on
+        split_workflow: true
+
+    - uses: ZedThree/clang-tidy-review/upload@v0.13.0
diff --git a/.gitignore b/.gitignore
index 107caa228..a4df837a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,8 +50,8 @@ models-mnt
 /embd-input-test
 /gguf
 /gguf-llama-simple
-/gptneox-main
 /libllama.so
+/llama-bench
 build-info.h
 arm_neon.h
 compile_commands.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c6ed458b3..bb63ef98e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -296,7 +296,6 @@ if (LLAMA_METAL)
     find_library(FOUNDATION_LIBRARY         Foundation              REQUIRED)
     find_library(METAL_FRAMEWORK            Metal                   REQUIRED)
     find_library(METALKIT_FRAMEWORK         MetalKit                REQUIRED)
-    find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
 
     set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
 
@@ -313,7 +312,6 @@ if (LLAMA_METAL)
         ${FOUNDATION_LIBRARY}
         ${METAL_FRAMEWORK}
         ${METALKIT_FRAMEWORK}
-        ${METALPERFORMANCE_FRAMEWORK}
         )
 endif()
 
@@ -499,9 +497,11 @@ else()
 endif()
 
 #
-# Build libraries
+# libraries
 #
 
+# ggml
+
 add_library(ggml OBJECT
             ggml.c
             ggml.h
@@ -526,6 +526,8 @@ if (BUILD_SHARED_LIBS)
     install(TARGETS ggml_shared LIBRARY)
 endif()
 
+# llama
+
 add_library(llama
             llama.cpp
             llama.h
@@ -547,6 +549,10 @@ if (BUILD_SHARED_LIBS)
     install(TARGETS llama LIBRARY)
 endif()
 
+#
+# install
+#
+
 include(GNUInstallDirs)
 install(
     FILES convert.py
@@ -570,11 +576,23 @@ install(
         WORLD_READ
         WORLD_EXECUTE
     DESTINATION ${CMAKE_INSTALL_BINDIR})
+if (LLAMA_METAL)
+    install(
+        FILES ggml-metal.metal
+        PERMISSIONS
+            OWNER_READ
+            OWNER_WRITE
+            GROUP_READ
+            WORLD_READ
+        DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
 
 #
 # programs, examples and tests
 #
 
+add_subdirectory(common)
+
 if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
     include(CTest)
     add_subdirectory(tests)
diff --git a/Makefile b/Makefile
index 4d2a8cfa9..d31acc450 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple server embd-input-test gguf gptneox-main
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple server embd-input-test gguf llama-bench
 
 # Binaries only useful for tests
-TEST_TARGETS = tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
+TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
 
 default: $(BUILD_TARGETS)
 
@@ -45,8 +45,8 @@ OPT = -Ofast
 else
 OPT = -O3
 endif
-CFLAGS   = -I.              $(OPT) -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
+CFLAGS   = -I.            $(OPT) -std=c11   -fPIC
+CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
 LDFLAGS  =
 
 ifdef LLAMA_DEBUG
@@ -283,7 +283,7 @@ endif # LLAMA_CLBLAST
 ifdef LLAMA_METAL
 	CFLAGS   += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
 	CXXFLAGS += -DGGML_USE_METAL
-	LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
+	LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit
 	OBJS     += ggml-metal.o
 endif # LLAMA_METAL
 
@@ -332,20 +332,20 @@ OBJS += ggml-alloc.o
 llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-common.o: examples/common.cpp examples/common.h
+common.o: common/common.cpp common/common.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-console.o: examples/console.cpp examples/console.h
+console.o: common/console.cpp common/console.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
+grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 libllama.so: llama.o ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
 
 clean:
-	rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch convert-llama2c-to-ggml embd-input-test gguf build-info.h $(TEST_TARGETS)
+	rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch convert-llama2c-to-ggml embd-input-test gguf llama-bench build-info.h $(TEST_TARGETS)
 
 #
 # Examples
@@ -388,15 +388,15 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te
 gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-gptneox-main: gptneox-main.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-
-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o $(OBJS)
+train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp    build-info.h ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 build-info.h: $(wildcard .git/index) scripts/build-info.sh
 	@sh scripts/build-info.sh > $@.tmp
 	@if ! cmp -s $@.tmp $@; then \
@@ -418,7 +418,10 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
 vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
 
-tests/test-grammar-parser: tests/test-grammar-parser.cpp examples/grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
+
+tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 
 tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
diff --git a/README.md b/README.md
index 6900b1152..a83a3c128 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,7 @@ as the main playground for developing new features for the [ggml](https://github
 - Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
 - Node.js: [hlhr202/llama-node](https://github.com/hlhr202/llama-node)
 - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
+- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
 - C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
 - Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s)
 
@@ -238,12 +239,17 @@ In order to build llama.cpp you have three different options.
     cmake --build . --config Release
     ```
 
-- Using `Zig`:
+- Using `Zig` (version 0.11 or later):
+
+    Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
+    it's also possible to cross compile for other operating systems and architectures:
 
     ```bash
-    zig build -Doptimize=ReleaseFast
+    zig build -Doptimize=ReleaseFast -Dtarget=x86_64-windows-gnu -Dcpu=x86_64+avx2+fma+f16c
     ```
 
+    The `zig targets` command will give you valid options to use.
+
 -   Using `gmake` (FreeBSD):
 
     1. Install and activate [DRM in FreeBSD](https://wiki.freebsd.org/Graphics)
@@ -284,7 +290,7 @@ When built with Metal support, you can enable GPU inference with the `--gpu-laye
 Any value larger than 0 will offload the computation to the GPU. For example:
 
 ```bash
-./main -m ./models/7B/ggml-model-q4_0.bin -n 128 -ngl 1
+./main -m ./models/7B/ggml-model-q4_0.gguf -n 128 -ngl 1
 ```
 
 ### MPI Build
@@ -323,7 +329,7 @@ The above will distribute the computation across 2 processes on the first host a
 Finally, you're ready to run a computation using `mpirun`:
 
 ```bash
-mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.bin -n 128
+mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
 ```
 
 ### BLAS Build
@@ -408,7 +414,7 @@ Building the program with BLAS support may lead to some performance improvements
   |-------------------------|------------------------|---------|-------------|
   | LLAMA_CUDA_FORCE_DMMV   | Boolean                |   false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
   | LLAMA_CUDA_DMMV_X       | Positive integer >= 32 |      32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
-  | LLAMA_CUDA_MMV_Y        | Positive integer       |       1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
+  | LLAMA_CUDA_MMV_Y        | Positive integer       |       1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. |
   | LLAMA_CUDA_F16          | Boolean                |   false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. |
   | LLAMA_CUDA_KQUANTS_ITER | 1 or 2                 |       2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
 
@@ -506,10 +512,10 @@ python3 convert.py models/7B/
   python convert.py models/7B/ --vocabtype bpe
 
 # quantize the model to 4-bits (using q4_0 method)
-./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
+./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0
 
 # run the inference
-./main -m ./models/7B/ggml-model-q4_0.bin -n 128
+./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
 ```
 
 When running the larger models, make sure you have enough disk space to store all the intermediate files.
@@ -565,7 +571,7 @@ Here is an example of a few-shot interaction, invoked with the command
 ./examples/chat-13B.sh
 
 # custom arguments using a 13B model
-./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
+./main -m ./models/13B/ggml-model-q4_0.gguf -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
 ```
 
 Note the use of `--color` to distinguish between user input and generated text. Other parameters are explained in more detail in the [README](examples/main/README.md) for the `main` example program.
@@ -628,6 +634,8 @@ OpenLLaMA is an openly licensed reproduction of Meta's original LLaMA model. It
 
 ### Using [GPT4All](https://github.com/nomic-ai/gpt4all)
 
+*Note: these instructions are likely obsoleted by the GGUF update*
+
 - Obtain the `tokenizer.model` file from LLaMA model and put it to `models`
 - Obtain the `added_tokens.json` file from Alpaca model and put it to `models`
 - Obtain the `gpt4all-lora-quantized.bin` file from GPT4All model and put it to `models/gpt4all-7B`
@@ -703,7 +711,7 @@ If your issue is with model generation quality, then please at least scan the fo
 #### How to run
 
 1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
-2. Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
+2. Run `./perplexity -m models/7B/ggml-model-q4_0.gguf -f wiki.test.raw`
 3. Output:
 ```
 perplexity : calculating perplexity over 655 chunks
@@ -802,13 +810,13 @@ docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-
 On completion, you are ready to play!
 
 ```bash
-docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
+docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512
 ```
 
 or with a light image:
 
 ```bash
-docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
+docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512
 ```
 
 ### Docker With CUDA
@@ -839,8 +847,8 @@ The resulting images, are essentially the same as the non-CUDA images:
 After building locally, Usage is similar to the non-CUDA examples, but you'll need to add the `--gpus` flag. You will also want to use the `--n-gpu-layers` flag.
 
 ```bash
-docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
-docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
+docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
+docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
 ```
 
 ### Contributing
diff --git a/build.zig b/build.zig
index 04c88d8a2..f2769ba8c 100644
--- a/build.zig
+++ b/build.zig
@@ -1,5 +1,6 @@
 // Compatible with Zig Version 0.11.0
 const std = @import("std");
+const ArrayList = std.ArrayList;
 const Compile = std.Build.Step.Compile;
 const ConfigHeader = std.Build.Step.ConfigHeader;
 const Mode = std.builtin.Mode;
@@ -10,11 +11,31 @@ const Maker = struct {
     target: CrossTarget,
     optimize: Mode,
     config_header: *ConfigHeader,
+    enable_lto: bool,
 
-    const cflags = .{"-std=c11"};
-    const cxxflags = .{"-std=c++11"};
+    include_dirs: ArrayList([]const u8),
+    cflags: ArrayList([]const u8),
+    cxxflags: ArrayList([]const u8),
+    objs: ArrayList(*Compile),
 
-    fn init(builder: *std.build.Builder) Maker {
+    fn addInclude(m: *Maker, dir: []const u8) !void {
+        try m.include_dirs.append(dir);
+    }
+    fn addProjectInclude(m: *Maker, path: []const []const u8) !void {
+        try m.addInclude(try m.builder.build_root.join(m.builder.allocator, path));
+    }
+    fn addCFlag(m: *Maker, flag: []const u8) !void {
+        try m.cflags.append(flag);
+    }
+    fn addCxxFlag(m: *Maker, flag: []const u8) !void {
+        try m.cxxflags.append(flag);
+    }
+    fn addFlag(m: *Maker, flag: []const u8) !void {
+        try m.addCFlag(flag);
+        try m.addCxxFlag(flag);
+    }
+
+    fn init(builder: *std.build.Builder) !Maker {
         const commit_hash = @embedFile(".git/refs/heads/master");
         const config_header = builder.addConfigHeader(
             .{ .style = .blank, .include_path = "build-info.h" },
@@ -23,58 +44,71 @@ const Maker = struct {
                 .BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
             },
         );
-        return Maker{
+        var m = Maker{
             .builder = builder,
             .target = builder.standardTargetOptions(.{}),
             .optimize = builder.standardOptimizeOption(.{}),
             .config_header = config_header,
+            .enable_lto = false,
+            .include_dirs = ArrayList([]const u8).init(builder.allocator),
+            .cflags = ArrayList([]const u8).init(builder.allocator),
+            .cxxflags = ArrayList([]const u8).init(builder.allocator),
+            .objs = ArrayList(*Compile).init(builder.allocator),
         };
+        try m.addCFlag("-std=c11");
+        try m.addCxxFlag("-std=c++11");
+        try m.addProjectInclude(&.{});
+        try m.addProjectInclude(&.{"examples"});
+        return m;
     }
 
     fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
         const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
         if (std.mem.endsWith(u8, src, ".c")) {
-            o.addCSourceFiles(&.{src}, &cflags);
+            o.addCSourceFiles(&.{src}, m.cflags.items);
             o.linkLibC();
         } else {
-            o.addCSourceFiles(&.{src}, &cxxflags);
+            o.addCSourceFiles(&.{src}, m.cxxflags.items);
             o.linkLibCpp();
         }
-        o.addIncludePath(.{ .path = "." });
-        o.addIncludePath(.{ .path = "./examples" });
+        for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i });
+        o.want_lto = m.enable_lto;
         return o;
     }
 
     fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
         const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
-        e.addIncludePath(.{ .path = "." });
-        e.addIncludePath(.{ .path = "./examples" });
-        e.addCSourceFiles(&.{src}, &cxxflags);
+        e.addCSourceFiles(&.{src}, m.cxxflags.items);
         for (deps) |d| e.addObject(d);
+        for (m.objs.items) |o| e.addObject(o);
+        for (m.include_dirs.items) |i| e.addIncludePath(.{ .path = i });
         e.linkLibC();
         e.linkLibCpp();
         e.addConfigHeader(m.config_header);
         m.builder.installArtifact(e);
-
-        // Currently a bug is preventing correct linking for optimized builds for Windows:
-        // https://github.com/ziglang/zig/issues/15958
-        if (e.target.isWindows()) {
-            e.want_lto = false;
-        }
+        e.want_lto = m.enable_lto;
         return e;
     }
 };
 
-pub fn build(b: *std.build.Builder) void {
-    const make = Maker.init(b);
+pub fn build(b: *std.build.Builder) !void {
+    var make = try Maker.init(b);
+    make.enable_lto = b.option(bool, "lto", "Enable LTO optimization, (default: false)") orelse false;
+
+    if (b.option(bool, "k-quants", "Enable K-quants, (default: true)") orelse true) {
+        try make.addFlag("-DGGML_USE_K_QUANTS");
+        const k_quants = make.obj("k_quants", "k_quants.c");
+        try make.objs.append(k_quants);
+    }
 
     const ggml = make.obj("ggml", "ggml.c");
     const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
     const llama = make.obj("llama", "llama.cpp");
     const common = make.obj("common", "examples/common.cpp");
+    const console = make.obj("common", "examples/console.cpp");
     const grammar_parser = make.obj("grammar-parser", "examples/grammar-parser.cpp");
 
-    _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
+    _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, console, grammar_parser });
     _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama });
     _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, llama, common });
     _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, llama, common });
diff --git a/ci/run.sh b/ci/run.sh
index 8dc394964..54ba6d710 100644
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -159,17 +159,17 @@ function gg_run_open_llama_3b_v2 {
 
     python3 ../convert.py ${path_models}
 
-    model_f16="${path_models}/ggml-model-f16.bin"
-    model_q8_0="${path_models}/ggml-model-q8_0.bin"
-    model_q4_0="${path_models}/ggml-model-q4_0.bin"
-    model_q4_1="${path_models}/ggml-model-q4_1.bin"
-    model_q5_0="${path_models}/ggml-model-q5_0.bin"
-    model_q5_1="${path_models}/ggml-model-q5_1.bin"
-    model_q2_k="${path_models}/ggml-model-q2_k.bin"
-    model_q3_k="${path_models}/ggml-model-q3_k.bin"
-    model_q4_k="${path_models}/ggml-model-q4_k.bin"
-    model_q5_k="${path_models}/ggml-model-q5_k.bin"
-    model_q6_k="${path_models}/ggml-model-q6_k.bin"
+    model_f16="${path_models}/ggml-model-f16.gguf"
+    model_q8_0="${path_models}/ggml-model-q8_0.gguf"
+    model_q4_0="${path_models}/ggml-model-q4_0.gguf"
+    model_q4_1="${path_models}/ggml-model-q4_1.gguf"
+    model_q5_0="${path_models}/ggml-model-q5_0.gguf"
+    model_q5_1="${path_models}/ggml-model-q5_1.gguf"
+    model_q2_k="${path_models}/ggml-model-q2_k.gguf"
+    model_q3_k="${path_models}/ggml-model-q3_k.gguf"
+    model_q4_k="${path_models}/ggml-model-q4_k.gguf"
+    model_q5_k="${path_models}/ggml-model-q5_k.gguf"
+    model_q6_k="${path_models}/ggml-model-q6_k.gguf"
 
     wiki_test_60="${path_wiki}/wiki.test-60.raw"
 
@@ -285,17 +285,17 @@ function gg_run_open_llama_7b_v2 {
 
     python3 ../convert.py ${path_models}
 
-    model_f16="${path_models}/ggml-model-f16.bin"
-    model_q8_0="${path_models}/ggml-model-q8_0.bin"
-    model_q4_0="${path_models}/ggml-model-q4_0.bin"
-    model_q4_1="${path_models}/ggml-model-q4_1.bin"
-    model_q5_0="${path_models}/ggml-model-q5_0.bin"
-    model_q5_1="${path_models}/ggml-model-q5_1.bin"
-    model_q2_k="${path_models}/ggml-model-q2_k.bin"
-    model_q3_k="${path_models}/ggml-model-q3_k.bin"
-    model_q4_k="${path_models}/ggml-model-q4_k.bin"
-    model_q5_k="${path_models}/ggml-model-q5_k.bin"
-    model_q6_k="${path_models}/ggml-model-q6_k.bin"
+    model_f16="${path_models}/ggml-model-f16.gguf"
+    model_q8_0="${path_models}/ggml-model-q8_0.gguf"
+    model_q4_0="${path_models}/ggml-model-q4_0.gguf"
+    model_q4_1="${path_models}/ggml-model-q4_1.gguf"
+    model_q5_0="${path_models}/ggml-model-q5_0.gguf"
+    model_q5_1="${path_models}/ggml-model-q5_1.gguf"
+    model_q2_k="${path_models}/ggml-model-q2_k.gguf"
+    model_q3_k="${path_models}/ggml-model-q3_k.gguf"
+    model_q4_k="${path_models}/ggml-model-q4_k.gguf"
+    model_q5_k="${path_models}/ggml-model-q5_k.gguf"
+    model_q6_k="${path_models}/ggml-model-q6_k.gguf"
 
     wiki_test="${path_wiki}/wiki.test.raw"
 
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
new file mode 100644
index 000000000..dead56118
--- /dev/null
+++ b/common/CMakeLists.txt
@@ -0,0 +1,20 @@
+# common
+
+set(TARGET common)
+
+add_library(${TARGET} OBJECT
+    common.h
+    common.cpp
+    console.h
+    console.cpp
+    grammar-parser.h
+    grammar-parser.cpp
+    )
+
+if (BUILD_SHARED_LIBS)
+    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif()
+
+target_include_directories(${TARGET} PUBLIC .)
+target_compile_features(${TARGET} PUBLIC cxx_std_11)
+target_link_libraries(${TARGET} PRIVATE llama)
diff --git a/examples/common.cpp b/common/common.cpp
similarity index 89%
rename from examples/common.cpp
rename to common/common.cpp
index 8beb63f36..d7e1a5725 100644
--- a/examples/common.cpp
+++ b/common/common.cpp
@@ -262,6 +262,21 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             params.cfg_negative_prompt = argv[i];
+        } else if (arg == "--cfg-negative-prompt-file") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            std::ifstream file(argv[i]);
+            if (!file) {
+                fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+                invalid_param = true;
+                break;
+            }
+            std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.cfg_negative_prompt));
+            if (params.cfg_negative_prompt.back() == '\n') {
+                params.cfg_negative_prompt.pop_back();
+            }
         } else if (arg == "--cfg-scale") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -412,7 +427,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             }
             params.hellaswag_tasks = std::stoi(argv[i]);
         } else if (arg == "--ignore-eos") {
-            params.logit_bias[llama_token_eos()] = -INFINITY;
+            params.ignore_eos = true;
         } else if (arg == "--no-penalize-nl") {
             params.penalize_nl = false;
         } else if (arg == "-l" || arg == "--logit-bias") {
@@ -553,8 +568,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stdout, "                        or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
     fprintf(stdout, "  --grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)\n");
     fprintf(stdout, "  --grammar-file FNAME  file to read grammar from\n");
-    fprintf(stdout, "  --cfg-negative-prompt PROMPT \n");
+    fprintf(stdout, "  --cfg-negative-prompt PROMPT\n");
     fprintf(stdout, "                        negative prompt to use for guidance. (default: empty)\n");
+    fprintf(stdout, "  --cfg-negative-prompt-file FNAME\n");
+    fprintf(stdout, "                        negative prompt file to use for guidance. (default: empty)\n");
     fprintf(stdout, "  --cfg-scale N         strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
     fprintf(stdout, "  --rope-scale N        RoPE context linear scaling factor, inverse of --rope-freq-scale (default: %g)\n", 1.0f/params.rope_freq_scale);
     fprintf(stdout, "  --rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: %.1f)\n", params.rope_freq_base);
@@ -619,6 +636,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
     return "The";
 }
 
+//
+// Model utils
+//
+
 struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
     auto lparams = llama_context_default_params();
 
@@ -641,7 +662,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
     return lparams;
 }
 
-std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params) {
+std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
     auto lparams = llama_context_params_from_gpt_params(params);
 
     llama_model * model  = llama_load_model_from_file(params.model.c_str(), lparams);
@@ -670,5 +691,77 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
         }
     }
 
+    if (params.ignore_eos) {
+        params.logit_bias[llama_token_eos(lctx)] = -INFINITY;
+    }
+
     return std::make_tuple(model, lctx);
 }
+
+//
+// Vocab utils
+//
+
+std::vector<llama_token> llama_tokenize(
+        struct llama_context * ctx,
+           const std::string & text,
+                        bool   add_bos) {
+    // upper limit for the number of tokens
+    int n_tokens = text.length() + add_bos;
+    std::vector<llama_token> result(n_tokens);
+    n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+    return result;
+}
+
+std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
+    std::vector<char> result(8, 0);
+    const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        int check = llama_token_to_str(ctx, token, result.data(), result.size());
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+
+    return std::string(result.data(), result.size());
+}
+
+std::vector<llama_token> llama_tokenize_bpe(
+        struct llama_context * ctx,
+           const std::string & text,
+                        bool   add_bos) {
+    int n_tokens = text.length() + add_bos;
+    std::vector<llama_token> result(n_tokens);
+    n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+    return result;
+}
+
+std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
+    std::vector<char> result(8, 0);
+    const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+
+    return std::string(result.data(), result.size());
+}
+
diff --git a/examples/common.h b/common/common.h
similarity index 87%
rename from examples/common.h
rename to common/common.h
index b8510dfe4..c50a6edfc 100644
--- a/examples/common.h
+++ b/common/common.h
@@ -2,7 +2,6 @@
 
 #pragma once
 
-#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
 
 #include <string>
@@ -33,7 +32,6 @@ struct gpt_params {
     float   rope_freq_scale                 = 1.0f;     // RoPE frequency scaling factor
 
     // sampling parameters
-    std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
     int32_t top_k             = 40;    // <= 0 to use vocab size
     float   top_p             = 0.95f; // 1.0 = disabled
     float   tfs_z             = 1.00f; // 1.0 = disabled
@@ -47,12 +45,14 @@ struct gpt_params {
     float   mirostat_tau      = 5.00f; // target entropy
     float   mirostat_eta      = 0.10f; // learning rate
 
+    std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
+
     // Classifier-Free Guidance
     // https://arxiv.org/abs/2306.17806
     std::string cfg_negative_prompt;       // string to help guidance
     float       cfg_scale         = 1.f;   // How strong is guidance
 
-    std::string model             = "models/7B/ggml-model.bin"; // model path
+    std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
     std::string model_alias       = "unknown"; // model alias
     std::string prompt            = "";
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
@@ -82,6 +82,7 @@ struct gpt_params {
     bool simple_io         = false; // improves compatibility with subprocesses and limited consoles
 
     bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
+    bool ignore_eos        = false; // ignore generated EOS tokens
     bool instruct          = false; // instruction mode (used for Alpaca models)
     bool penalize_nl       = true;  // consider newlines as a repeatable token
     bool perplexity        = false; // compute perplexity over the prompt
@@ -103,5 +104,27 @@ std::string gpt_random_prompt(std::mt19937 & rng);
 // Model utils
 //
 
-std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
+std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
 struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
+
+//
+// Vocab utils
+//
+
+std::vector<llama_token> llama_tokenize(
+        struct llama_context * ctx,
+           const std::string & text,
+                        bool   add_bos);
+
+std::vector<llama_token> llama_tokenize_bpe(
+        struct llama_context * ctx,
+           const std::string & text,
+                        bool   add_bos);
+
+std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token);
+
+std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token);
diff --git a/examples/console.cpp b/common/console.cpp
similarity index 100%
rename from examples/console.cpp
rename to common/console.cpp
diff --git a/examples/console.h b/common/console.h
similarity index 100%
rename from examples/console.h
rename to common/console.h
diff --git a/examples/grammar-parser.cpp b/common/grammar-parser.cpp
similarity index 100%
rename from examples/grammar-parser.cpp
rename to common/grammar-parser.cpp
diff --git a/examples/grammar-parser.h b/common/grammar-parser.h
similarity index 100%
rename from examples/grammar-parser.h
rename to common/grammar-parser.h
diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py
new file mode 100644
index 000000000..e0da3a04d
--- /dev/null
+++ b/convert-falcon-hf-to-gguf.py
@@ -0,0 +1,282 @@
+# HF falcon--> gguf conversion
+
+import gguf
+import os
+import sys
+import struct
+import json
+import numpy as np
+import torch
+
+from typing import Any, List
+from pathlib import Path
+from transformers import AutoTokenizer
+
+def bytes_to_unicode():
+    # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
+    cs = bs[:]
+    n = 0
+    for b in range(2**8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2**8+n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+
+def count_model_parts(dir_model: str) -> int:
+    num_parts = 0
+    for filename in os.listdir(dir_model):
+        if filename.startswith("pytorch_model-"):
+            num_parts += 1
+
+    if num_parts > 0:
+        print("gguf: found " + str(num_parts) + " model parts")
+    return num_parts
+
+
+if len(sys.argv) < 3:
+    print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
+    print("  ftype == 0 -> float32")
+    print("  ftype == 1 -> float16")
+    sys.exit(1)
+
+
+# output in the same directory as the model
+dir_model = sys.argv[1]
+last_dir = os.path.basename(os.path.normpath(dir_model))
+
+# possible tensor data types
+#   ftype == 0 -> float32
+#   ftype == 1 -> float16
+
+# map from ftype to string
+ftype_str = ["f32", "f16"]
+
+ftype = 1
+if len(sys.argv) > 2:
+    ftype = int(sys.argv[2])
+    if ftype < 0 or ftype > 1:
+        print("Invalid ftype: " + str(ftype))
+
+        sys.exit(1)
+
+fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
+
+print("gguf: loading model "+last_dir)
+
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
+    hparams = json.load(f)
+
+if hparams["architectures"][0] != "RWForCausalLM":
+    print("Model architecture not supported: " + hparams["architectures"][0])
+
+    sys.exit()
+
+# get number of model parts
+num_parts = count_model_parts(dir_model)
+
+ARCH=gguf.MODEL_ARCH.FALCON
+gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
+
+print("gguf: get model metadata")
+
+block_count = hparams["n_layer"]
+
+gguf_writer.add_name(last_dir)
+gguf_writer.add_context_length(2048) # not in config.json
+gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
+gguf_writer.add_embedding_length(hparams["hidden_size"])
+gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
+gguf_writer.add_block_count(block_count)
+gguf_writer.add_head_count(hparams["n_head"])
+if "n_head_kv" in hparams: gguf_writer.add_head_count_kv(hparams["n_head_kv"])
+gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
+
+# TOKENIZATION
+
+print("gguf: get tokenizer metadata")
+
+tokens: List[str] = []
+merges: List[str] = []
+
+
+if Path(dir_model + "/tokenizer.json").is_file():
+    # gpt2 tokenizer
+    gguf_writer.add_tokenizer_model("gpt2")
+
+    print("gguf: get gpt2 tokenizer merges")
+
+    with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
+        tokenizer_json = json.load(f)
+    merges = tokenizer_json["model"]["merges"]
+
+    gguf_writer.add_token_merges(merges)
+
+    print("gguf: get gpt2 tokenizer vocab")
+
+    vocab_size = len(tokenizer_json["model"]["vocab"])
+
+    # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
+    tokenizer = AutoTokenizer.from_pretrained(dir_model)
+
+    reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
+    byte_encoder = bytes_to_unicode()
+    byte_decoder = {v: k for k, v in byte_encoder.items()}
+
+    for i in range(vocab_size):
+        if i in reverse_vocab:
+            try:
+                text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
+            except KeyError:
+                text = bytearray()
+                for c in reverse_vocab[i]:
+                    if ord(c) < 256:  # single byte character
+                        text.append(byte_decoder[ord(c)])
+                    else:  # multibyte special token character
+                        text.extend(c.encode('utf-8'))
+        else:
+            print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
+            pad_token = f"[PAD{i}]".encode("utf8")
+            text = bytearray(pad_token)
+
+        tokens.append(text)
+
+    gguf_writer.add_token_list(tokens)
+
+    if "added_tokens" in tokenizer_json and Path(dir_model + "/tokenizer_config.json").is_file():
+        print("gguf: get special token ids")
+
+        with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
+            tokenizer_config = json.load(f)
+
+        # find special token ids
+
+        if "bos_token" in tokenizer_config:
+            for key in tokenizer_json["added_tokens"]:
+                if key["content"] == tokenizer_config["bos_token"]:
+                    gguf_writer.add_bos_token_id(key["id"])
+
+        if "eos_token" in tokenizer_config:
+            for key in tokenizer_json["added_tokens"]:
+                if key["content"] == tokenizer_config["eos_token"]:
+                    gguf_writer.add_eos_token_id(key["id"])
+
+        if "unk_token" in tokenizer_config:
+            for key in tokenizer_json["added_tokens"]:
+                if key["content"] == tokenizer_config["unk_token"]:
+                    gguf_writer.add_unk_token_id(key["id"])
+
+        if "sep_token" in tokenizer_config:
+            for key in tokenizer_json["added_tokens"]:
+                if key["content"] == tokenizer_config["sep_token"]:
+                    gguf_writer.add_sep_token_id(key["id"])
+
+        if "pad_token" in tokenizer_config:
+            for key in tokenizer_json["added_tokens"]:
+                if key["content"] == tokenizer_config["pad_token"]:
+                    gguf_writer.add_pad_token_id(key["id"])
+
+
+# TENSORS
+
+tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
+
+# params for qkv transform
+n_head = hparams["n_head"]
+n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
+head_dim = hparams["hidden_size"] // n_head
+
+# tensor info
+print("gguf: get tensor metadata")
+
+if num_parts == 0:
+    part_names = ("pytorch_model.bin",)
+else:
+    part_names = (
+        f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
+    )
+
+for part_name in part_names:
+    print("gguf: loading model part '" + part_name + "'")
+    model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
+
+    for name in model_part.keys():
+        data = model_part[name]
+
+        old_dtype = data.dtype
+
+        # convert any unsupported data types to float32
+        if data.dtype != torch.float16 and data.dtype != torch.float32:
+            data = data.to(torch.float32)
+
+        # QKV tensor transform
+        # The original query_key_value tensor contains n_head_kv "kv groups",
+        # each consisting of n_head/n_head_kv query weights followed by one key
+        # and one value weight (shared by all query heads in the kv group).
+        # This layout makes it a big pain to work with in GGML.
+        # So we rearrange them here,, so that we have n_head query weights
+        # followed by n_head_kv key weights followed by n_head_kv value weights,
+        # in contiguous fashion.
+        # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
+
+        if "query_key_value" in name:
+            qkv = data.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
+            q = qkv[:, :-2 ].reshape(n_head * head_dim, head_dim * n_head)
+            k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
+            v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
+            data = torch.cat((q,k,v)).reshape_as(data)
+
+        data = data.squeeze().numpy()
+
+        # map tensor names
+        if name.endswith(".weight") and name[:-7] in tensor_map:
+            name = tensor_map[name[:-7]] + ".weight"
+        elif name.endswith(".bias") and name[:-5] in tensor_map:
+            name = tensor_map[name[:-5]] + ".bias"
+        else:
+            print("Can not map tensor '" + name + "'")
+            sys.exit()
+
+        n_dims = len(data.shape)
+        data_dtype = data.dtype 
+
+        # if f32 desired, convert any float16 to float32
+        if ftype == 0 and data_dtype == np.float16:
+            data = data.astype(np.float32)
+
+        # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
+        if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
+            data = data.astype(np.float32)
+
+        # if f16 desired, convert any float32 2-dim weight tensors to float16
+        if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
+            data = data.astype(np.float16)
+
+        print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+
+        gguf_writer.add_tensor(name, data)
+
+
+print("gguf: write header")
+gguf_writer.write_header_to_file()
+print("gguf: write metadata")
+gguf_writer.write_kv_data_to_file()
+print("gguf: write tensors")
+gguf_writer.write_tensors_to_file()
+
+gguf_writer.close()
+
+print("gguf: model successfully exported to '" + fname_out + "'")
+print("")
diff --git a/convert-gptneox-h5-to-gguf.py b/convert-gptneox-hf-to-gguf.py
similarity index 67%
rename from convert-gptneox-h5-to-gguf.py
rename to convert-gptneox-hf-to-gguf.py
index 79876eee3..a7cefc6f3 100644
--- a/convert-gptneox-h5-to-gguf.py
+++ b/convert-gptneox-hf-to-gguf.py
@@ -13,6 +13,8 @@ from pathlib import Path
 from transformers import AutoTokenizer
 
 # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
+
+
 def bytes_to_unicode():
     """
     Returns list of utf-8 byte and a corresponding list of unicode strings.
@@ -34,6 +36,7 @@ def bytes_to_unicode():
     cs = [chr(n) for n in cs]
     return dict(zip(bs, cs))
 
+
 def count_model_parts(dir_model: str) -> int:
     num_parts = 0
     for filename in os.listdir(dir_model):
@@ -44,6 +47,7 @@ def count_model_parts(dir_model: str) -> int:
         print("gguf: found " + str(num_parts) + " model parts")
     return num_parts
 
+
 if len(sys.argv) < 3:
     print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
     print("  ftype == 0 -> float32")
@@ -58,7 +62,7 @@ last_dir = os.path.basename(os.path.normpath(dir_model))
 # possible tensor data types
 #   ftype == 0 -> float32
 #   ftype == 1 -> float16
-#
+
 # map from ftype to string
 ftype_str = ["f32", "f16"]
 
@@ -67,6 +71,7 @@ if len(sys.argv) > 2:
     ftype = int(sys.argv[2])
     if ftype < 0 or ftype > 1:
         print("Invalid ftype: " + str(ftype))
+
         sys.exit(1)
 
 fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
@@ -77,30 +82,29 @@ with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
     hparams = json.load(f)
 
 if hparams["architectures"][0] != "GPTNeoXForCausalLM":
-    print("Model architecture not supported: " + hparams["architectures"][0] )
+    print("Model architecture not supported: " + hparams["architectures"][0])
+
     sys.exit()
 
 # get number of model parts
 num_parts = count_model_parts(dir_model)
 
-gguf_writer = gguf.GGUFWriter.open(fname_out)
+ARCH=gguf.MODEL_ARCH.GPTNEOX
+gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
 
 print("gguf: get model metadata")
 
-llm_arch    = "gptneox"
 block_count = hparams["num_hidden_layers"]
 
-gguf_writer.add_architecture(llm_arch)
 gguf_writer.add_name(last_dir)
-gguf_writer.add_file_type( "All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
-gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
-gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
-gguf_writer.add_block_count(llm_arch, block_count)
-gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])
-gguf_writer.add_rope_dimension_count(llm_arch, int( hparams["rotary_pct"]*(hparams["hidden_size"]//hparams["num_attention_heads"])) )
-gguf_writer.add_head_count(llm_arch, hparams["num_attention_heads"])
-gguf_writer.add_parallel_residual(llm_arch, hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
-gguf_writer.add_layer_norm_eps(llm_arch, hparams["layer_norm_eps"])
+gguf_writer.add_context_length(hparams["max_position_embeddings"])
+gguf_writer.add_embedding_length(hparams["hidden_size"])
+gguf_writer.add_block_count(block_count)
+gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+gguf_writer.add_rope_dimension_count(int(hparams["rotary_pct"]*(hparams["hidden_size"]//hparams["num_attention_heads"])))
+gguf_writer.add_head_count(hparams["num_attention_heads"])
+gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
+gguf_writer.add_layer_norm_eps(hparams["layer_norm_eps"])
 
 # TOKENIZATION
 
@@ -124,14 +128,14 @@ if Path(dir_model + "/tokenizer.json").is_file():
 
     print("gguf: get gpt2 tokenizer vocab")
 
-    vocab_size = len( tokenizer_json["model"]["vocab"] )
+    vocab_size = len(tokenizer_json["model"]["vocab"])
 
     # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
     tokenizer = AutoTokenizer.from_pretrained(dir_model)
 
     reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
     byte_encoder = bytes_to_unicode()
-    byte_decoder = {v:k for k, v in byte_encoder.items()}
+    byte_decoder = {v: k for k, v in byte_encoder.items()}
 
     for i in range(vocab_size):
         if i in reverse_vocab:
@@ -146,8 +150,9 @@ if Path(dir_model + "/tokenizer.json").is_file():
                         text.extend(c.encode('utf-8'))
         else:
             print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
-            padding_token = f"[PAD{i}]".encode("utf8")
-            text = bytearray(padding_token)
+            pad_token = f"[PAD{i}]".encode("utf8")
+            text = bytearray(pad_token)
+
         tokens.append(text)
 
     gguf_writer.add_token_list(tokens)
@@ -188,7 +193,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
 
 # TENSORS
 
-tensor_map = gguf.get_tensor_name_map(block_count)
+tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
 
 # tensor info
 print("gguf: get tensor metadata")
@@ -201,7 +206,7 @@ else:
     )
 
 for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
+    print("gguf: loading model part '" + part_name + "'")
     model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
 
     for name in model_part.keys():
@@ -211,71 +216,8 @@ for part_name in part_names:
         if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
             continue
 
-        # convert any unsupported data types to float32
-        if data.dtype != torch.float16 and data.dtype != torch.float32:
-            data = data.to(torch.float32)
-
-        data = data.squeeze().numpy()
-
-        # map tensor names
-        if name.endswith(".weight") and name[:-7] in tensor_map:
-            name = tensor_map[name[:-7]] + ".weight"
-        elif name.endswith(".bias") and name[:-5] in tensor_map:
-            name = tensor_map[name[:-5]] + ".bias"
-        else:
-            print( "Can not map tensor '" + name + "'" )
-            sys.exit()
-
-        n_dims = len(data.shape)
-        data_dtype = data.dtype
-
-        # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
-            data_dtype = np.float32
-
-        # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
-        if ftype == 1 and data.dtype == np.float16 and n_dims == 1:
-            data_dtype = np.float32
-
-        # if f16 desired, convert any float32 2-dim weight tensors to float16
-        if ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
-            data_dtype = np.float16
-
-        data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4
-
-        gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
-
-
-print("gguf: write header")
-gguf_writer.write_header_to_file()
-print("gguf: write metadata")
-gguf_writer.write_kv_data_to_file()
-print("gguf: write tensor metadata")
-gguf_writer.write_ti_data_to_file()
-
-# tensor data
-print("gguf: convert and write tensor data")
-
-if num_parts == 0:
-    part_names = ("pytorch_model.bin",)
-else:
-    part_names = (
-        f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
-    )
-
-for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
-    model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
-
-    for name in model_part.keys():
-        data = model_part[name]
-
         old_dtype = data.dtype
 
-        # we don't need these
-        if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
-            continue
-
         # convert any unsupported data types to float32
         if data.dtype != torch.float16 and data.dtype != torch.float32:
             data = data.to(torch.float32)
@@ -288,14 +230,14 @@ for part_name in part_names:
         elif name.endswith(".bias") and name[:-5] in tensor_map:
             name = tensor_map[name[:-5]] + ".bias"
         else:
-            print( "Can not map tensor '" + name + "'" )
+            print("Can not map tensor '" + name + "'")
             sys.exit()
 
         n_dims = len(data.shape)
         data_dtype = data.dtype
 
         # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
+        if ftype == 0 and data_dtype == np.float16:
             data = data.astype(np.float32)
 
         # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
@@ -306,12 +248,19 @@ for part_name in part_names:
         if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
             data = data.astype(np.float16)
 
-        print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+        print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
 
-        gguf_writer.write_tensor_to_file(data)
+        gguf_writer.add_tensor(name, data)
+
+
+print("gguf: write header")
+gguf_writer.write_header_to_file()
+print("gguf: write metadata")
+gguf_writer.write_kv_data_to_file()
+print("gguf: write tensors")
+gguf_writer.write_tensors_to_file()
 
 gguf_writer.close()
 
-
-print("gguf: model successfully exported to '" + fname_out + "'" )
+print("gguf: model successfully exported to '" + fname_out + "'")
 print("")
diff --git a/convert-llama-7b-pth-to-gguf.py b/convert-llama-7b-pth-to-gguf.py
index c4e425ee3..7c767df60 100644
--- a/convert-llama-7b-pth-to-gguf.py
+++ b/convert-llama-7b-pth-to-gguf.py
@@ -18,6 +18,7 @@ from sentencepiece import SentencePieceProcessor
 # compatible with python < 3.9
 NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
 
+
 def count_model_parts(dir_model: str) -> int:
     num_parts = 0
     for filename in os.listdir(dir_model):
@@ -28,10 +29,12 @@ def count_model_parts(dir_model: str) -> int:
         print("gguf: found " + str(num_parts) + " model parts")
     return num_parts
 
+
 if len(sys.argv) < 3:
     print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
     print("  ftype == 0 -> float32")
     print("  ftype == 1 -> float16")
+
     sys.exit(1)
 
 
@@ -43,7 +46,7 @@ last_dir = os.path.basename(os.path.normpath(dir_model))
 # possible tensor data types
 #   ftype == 0 -> float32
 #   ftype == 1 -> float16
-#
+
 # map from ftype to string
 ftype_str = ["f32", "f16"]
 
@@ -52,6 +55,7 @@ if len(sys.argv) > 2:
     ftype = int(sys.argv[2])
     if ftype < 0 or ftype > 1:
         print("Invalid ftype: " + str(ftype))
+
         sys.exit(1)
 
 fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
@@ -70,14 +74,15 @@ num_parts = count_model_parts(dir_model)
 
 if num_parts > 1:
     print("gguf: Only models with a single datafile are supported.")
+
     sys.exit()
 
-gguf_writer = gguf.GGUFWriter.open(fname_out)
+ARCH=gguf.MODEL_ARCH.LLAMA
+gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
 
 
 print("gguf: get model metadata")
 
-llm_arch = "llama"
 block_count = hparams["num_hidden_layers"]
 head_count = hparams["num_attention_heads"]
 
@@ -89,21 +94,19 @@ else:
 if "_name_or_path" in hparams:
     hf_repo = hparams["_name_or_path"]
 else:
-    hf_repo=""
+    hf_repo = ""
 
-gguf_writer.add_architecture(llm_arch)
 gguf_writer.add_name(last_dir)
-gguf_writer.add_file_type( "All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
 gguf_writer.add_source_hf_repo(hf_repo)
-gguf_writer.add_tensor_data_layout(llm_arch, "Meta AI original pth")
-gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
-gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
-gguf_writer.add_block_count(llm_arch, block_count)
-gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])
-gguf_writer.add_rope_dimension_count(llm_arch, hparams["hidden_size"] // hparams["num_attention_heads"])
-gguf_writer.add_head_count(llm_arch, head_count)
-gguf_writer.add_head_count_kv(llm_arch, head_count_kv)
-gguf_writer.add_layer_norm_rms_eps(llm_arch, hparams["rms_norm_eps"])
+gguf_writer.add_tensor_data_layout("Meta AI original pth")
+gguf_writer.add_context_length(hparams["max_position_embeddings"])
+gguf_writer.add_embedding_length(hparams["hidden_size"])
+gguf_writer.add_block_count(block_count)
+gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
+gguf_writer.add_head_count(head_count)
+gguf_writer.add_head_count_kv(head_count_kv)
+gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
 
 
 # TOKENIZATION
@@ -125,19 +128,23 @@ if Path(dir_model + "/tokenizer.model").is_file():
         score: float
 
         piece = tokenizer.id_to_piece(i)
-        text  = piece.encode("utf-8")
+        text = piece.encode("utf-8")
         score = tokenizer.get_score(i)
 
-        toktype = 1 # defualt to normal token type
-        if tokenizer.is_unknown(i): toktype = 2
-        if tokenizer.is_control(i): toktype = 3
+        toktype = 1  # defualt to normal token type
+        if tokenizer.is_unknown(i):
+            toktype = 2
+        if tokenizer.is_control(i):
+            toktype = 3
 
         # TODO: How to determinate if a token is user defined?
         # ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
         # if tokenizer.is_user_defined(i): toktype = 4
 
-        if tokenizer.is_unused(i):  toktype = 5
-        if tokenizer.is_byte(i):    toktype = 6
+        if tokenizer.is_unused(i):
+            toktype = 5
+        if tokenizer.is_byte(i):
+            toktype = 6
 
         tokens.append(text)
         scores.append(score)
@@ -188,15 +195,15 @@ if Path(dir_model + "/tokenizer.json").is_file():
 
 # TENSORS
 
-tensor_map = gguf.get_tensor_name_map(block_count)
+tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
 
 # tensor info
 print("gguf: get tensor metadata")
 
-part_names = ( f"consolidated.{n:02}.pth" for n in range(0, num_parts) )
+part_names = (f"consolidated.{n:02}.pth" for n in range(0, num_parts))
 
 for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
+    print("gguf: loading model part '" + part_name + "'")
     model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
 
     for name in model_part.keys():
@@ -206,66 +213,8 @@ for part_name in part_names:
         if name == "rope.freqs":
             continue
 
-        # convert any unsupported data types to float32
-        if data.dtype != torch.float16 and data.dtype != torch.float32:
-            data = data.to(torch.float32)
-
-        data = data.squeeze().numpy()
-
-        # map tensor names
-        if name.endswith(".weight") and name[:-7] in tensor_map:
-            name = tensor_map[name[:-7]] + ".weight"
-        elif name.endswith(".bias") and name[:-5] in tensor_map:
-            name = tensor_map[name[:-5]] + ".bias"
-        else:
-            print( "Can not map tensor '" + name + "'" )
-            sys.exit()
-
-        n_dims = len(data.shape)
-        data_dtype = data.dtype
-
-        # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
-            data_dtype = np.float32
-
-        # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
-        if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
-            data_dtype = np.float32
-
-        # if f16 desired, convert any float32 2-dim weight tensors to float16
-        if ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
-            data_dtype = np.float16
-
-        data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4
-
-        gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
-
-
-print("gguf: write header")
-gguf_writer.write_header_to_file()
-print("gguf: write metadata")
-gguf_writer.write_kv_data_to_file()
-print("gguf: write tensor metadata")
-gguf_writer.write_ti_data_to_file()
-
-# tensor data
-print("gguf: convert and write tensor data")
-
-part_names = ( f"consolidated.{n:02}.pth" for n in range(0, num_parts) )
-
-for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
-    model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
-
-    for name in model_part.keys():
-        data = model_part[name]
-
         old_dtype = data.dtype
 
-        # we don't need these
-        if name == "rope.freqs":
-            continue
-
         # convert any unsupported data types to float32
         if data.dtype != torch.float16 and data.dtype != torch.float32:
             data = data.to(torch.float32)
@@ -278,14 +227,14 @@ for part_name in part_names:
         elif name.endswith(".bias") and name[:-5] in tensor_map:
             name = tensor_map[name[:-5]] + ".bias"
         else:
-            print( "Can not map tensor '" + name + "'" )
+            print("Can not map tensor '" + name + "'")
             sys.exit()
 
         n_dims = len(data.shape)
         data_dtype = data.dtype
 
         # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
+        if ftype == 0 and data_dtype == np.float16:
             data = data.astype(np.float32)
 
         # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
@@ -296,9 +245,17 @@ for part_name in part_names:
         if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
             data = data.astype(np.float16)
 
-        print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+        print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
 
-        gguf_writer.write_tensor_to_file(data)
+        gguf_writer.add_tensor(name, data)
+
+
+print("gguf: write header")
+gguf_writer.write_header_to_file()
+print("gguf: write metadata")
+gguf_writer.write_kv_data_to_file()
+print("gguf: write tensors")
+gguf_writer.write_tensors_to_file()
 
 gguf_writer.close()
 
diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-hf-to-gguf.py
similarity index 66%
rename from convert-llama-h5-to-gguf.py
rename to convert-llama-hf-to-gguf.py
index a2b3f9a30..a998a6f49 100644
--- a/convert-llama-h5-to-gguf.py
+++ b/convert-llama-hf-to-gguf.py
@@ -18,26 +18,35 @@ NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
 
 # reverse HF permute back to original pth layout
 # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py
+
+
 def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
-    if n_kv_head is not None and n_head != n_kv_head: n_head //= n_kv_head
+    if n_kv_head is not None and n_head != n_kv_head:
+        n_head //= n_kv_head
+
     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
-                .swapaxes(1, 2)
-                .reshape(weights.shape))
+            .swapaxes(1, 2)
+            .reshape(weights.shape))
+
 
 def count_model_parts(dir_model: str) -> int:
     num_parts = 0
+
     for filename in os.listdir(dir_model):
         if filename.startswith("pytorch_model-"):
             num_parts += 1
 
     if num_parts > 0:
         print("gguf: found " + str(num_parts) + " model parts")
+
     return num_parts
 
+
 if len(sys.argv) < 3:
     print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
     print("  ftype == 0 -> float32")
     print("  ftype == 1 -> float16")
+
     sys.exit(1)
 
 
@@ -49,7 +58,8 @@ last_dir = os.path.basename(os.path.normpath(dir_model))
 # possible tensor data types
 #   ftype == 0 -> float32
 #   ftype == 1 -> float16
-#
+
+
 # map from ftype to string
 ftype_str = ["f32", "f16"]
 
@@ -58,6 +68,7 @@ if len(sys.argv) > 2:
     ftype = int(sys.argv[2])
     if ftype < 0 or ftype > 1:
         print("Invalid ftype: " + str(ftype))
+
         sys.exit(1)
 
 fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
@@ -69,17 +80,17 @@ with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
 
 if hparams["architectures"][0] != "LlamaForCausalLM":
     print("Model architecture not supported: " + hparams["architectures"][0])
+
     sys.exit()
 
 # get number of model parts
 num_parts = count_model_parts(dir_model)
 
-gguf_writer = gguf.GGUFWriter.open(fname_out)
-
+ARCH=gguf.MODEL_ARCH.LLAMA
+gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
 
 print("gguf: get model metadata")
 
-llm_arch = "llama"
 block_count = hparams["num_hidden_layers"]
 head_count = hparams["num_attention_heads"]
 
@@ -91,7 +102,7 @@ else:
 if "_name_or_path" in hparams:
     hf_repo = hparams["_name_or_path"]
 else:
-    hf_repo=""
+    hf_repo = ""
 
 if "max_sequence_length" in hparams:
     ctx_length = hparams["max_sequence_length"]
@@ -99,22 +110,21 @@ elif "max_position_embeddings" in hparams:
     ctx_length = hparams["max_position_embeddings"]
 else:
     print("gguf: can not find ctx length parameter.")
+
     sys.exit()
 
 
-gguf_writer.add_architecture(llm_arch)
 gguf_writer.add_name(last_dir)
-gguf_writer.add_file_type("All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
 gguf_writer.add_source_hf_repo(hf_repo)
-gguf_writer.add_tensor_data_layout(llm_arch, "Meta AI original pth")
-gguf_writer.add_context_length(llm_arch, ctx_length)
-gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
-gguf_writer.add_block_count(llm_arch, block_count)
-gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])
-gguf_writer.add_rope_dimension_count(llm_arch, hparams["hidden_size"] // hparams["num_attention_heads"])
-gguf_writer.add_head_count(llm_arch, head_count)
-gguf_writer.add_head_count_kv(llm_arch, head_count_kv)
-gguf_writer.add_layer_norm_rms_eps(llm_arch, hparams["rms_norm_eps"])
+gguf_writer.add_tensor_data_layout("Meta AI original pth")
+gguf_writer.add_context_length(ctx_length)
+gguf_writer.add_embedding_length(hparams["hidden_size"])
+gguf_writer.add_block_count(block_count)
+gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
+gguf_writer.add_head_count(head_count)
+gguf_writer.add_head_count_kv(head_count_kv)
+gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
 
 
 # TOKENIZATION
@@ -136,19 +146,23 @@ if Path(dir_model + "/tokenizer.model").is_file():
         score: float
 
         piece = tokenizer.id_to_piece(i)
-        text  = piece.encode("utf-8")
+        text = piece.encode("utf-8")
         score = tokenizer.get_score(i)
 
-        toktype = 1 # defualt to normal token type
-        if tokenizer.is_unknown(i): toktype = 2
-        if tokenizer.is_control(i): toktype = 3
+        toktype = 1  # defualt to normal token type
+        if tokenizer.is_unknown(i):
+            toktype = 2
+        if tokenizer.is_control(i):
+            toktype = 3
 
         # TODO: How to determinate if a token is user defined?
         # ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
         # if tokenizer.is_user_defined(i): toktype = 4
 
-        if tokenizer.is_unused(i):  toktype = 5
-        if tokenizer.is_byte(i):    toktype = 6
+        if tokenizer.is_unused(i):
+            toktype = 5
+        if tokenizer.is_byte(i):
+            toktype = 6
 
         tokens.append(text)
         scores.append(score)
@@ -199,7 +213,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
 
 # TENSORS
 
-tensor_map = gguf.get_tensor_name_map(block_count)
+tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
 
 # tensor info
 print("gguf: get tensor metadata")
@@ -212,7 +226,7 @@ else:
     )
 
 for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
+    print("gguf: loading model part '" + part_name + "'")
     model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
 
     for name in model_part.keys():
@@ -222,75 +236,8 @@ for part_name in part_names:
         if name.endswith(".rotary_emb.inv_freq"):
             continue
 
-        # convert any unsupported data types to float32
-        if data.dtype != torch.float16 and data.dtype != torch.float32:
-            data = data.to(torch.float32)
-
-        data = data.squeeze().numpy()
-
-        # reverse permute these
-        if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
-            data = reverse_hf_permute(data, head_count, head_count_kv)
-
-        # map tensor names
-        if name.endswith(".weight") and name[:-7] in tensor_map:
-            name = tensor_map[name[:-7]] + ".weight"
-        elif name.endswith(".bias") and name[:-5] in tensor_map:
-            name = tensor_map[name[:-5]] + ".bias"
-        else:
-            print( "Can not map tensor '" + name + "'" )
-            sys.exit()
-
-        n_dims = len(data.shape)
-        data_dtype = data.dtype
-
-        # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
-            data_dtype = np.float32
-
-        # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
-        if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
-            data_dtype = np.float32
-
-        # if f16 desired, convert any float32 2-dim weight tensors to float16
-        if ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
-            data_dtype = np.float16
-
-        data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4
-
-        gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
-
-
-print("gguf: write header")
-gguf_writer.write_header_to_file()
-print("gguf: write metadata")
-gguf_writer.write_kv_data_to_file()
-print("gguf: write tensor metadata")
-gguf_writer.write_ti_data_to_file()
-
-# tensor data
-print("gguf: convert and write tensor data")
-
-if num_parts == 0:
-    part_names = ("pytorch_model.bin",)
-else:
-    part_names = (
-        f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
-    )
-
-for part_name in part_names:
-    print("gguf: loading model part '"+ part_name + "'")
-    model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
-
-    for name in model_part.keys():
-        data = model_part[name]
-
         old_dtype = data.dtype
 
-        # we don't need these
-        if name.endswith(".rotary_emb.inv_freq"):
-            continue
-
         # convert any unsupported data types to float32
         if data.dtype != torch.float16 and data.dtype != torch.float32:
             data = data.to(torch.float32)
@@ -307,14 +254,14 @@ for part_name in part_names:
         elif name.endswith(".bias") and name[:-5] in tensor_map:
             name = tensor_map[name[:-5]] + ".bias"
         else:
-            print( "Can not map tensor '" + name + "'" )
+            print("Can not map tensor '" + name + "'")
             sys.exit()
 
         n_dims = len(data.shape)
         data_dtype = data.dtype
 
         # if f32 desired, convert any float16 to float32
-        if ftype == 0 and data.dtype == np.float16:
+        if ftype == 0 and data_dtype == np.float16:
             data = data.astype(np.float32)
 
         # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
@@ -325,9 +272,17 @@ for part_name in part_names:
         if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
             data = data.astype(np.float16)
 
-        print(name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
+        print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
 
-        gguf_writer.write_tensor_to_file(data)
+        gguf_writer.add_tensor(name, data)
+
+
+print("gguf: write header")
+gguf_writer.write_header_to_file()
+print("gguf: write metadata")
+gguf_writer.write_kv_data_to_file()
+print("gguf: write tensors")
+gguf_writer.write_tensors_to_file()
 
 gguf_writer.close()
 
diff --git a/convert-new.py b/convert-new.py
deleted file mode 100755
index 2c02ee73c..000000000
--- a/convert-new.py
+++ /dev/null
@@ -1,972 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import concurrent.futures
-import copy
-import enum
-import faulthandler
-import functools
-import io
-import itertools
-import json
-import math
-import mmap
-import pickle
-import re
-import signal
-import struct
-import sys
-import zipfile
-import numpy as np
-
-from abc import ABCMeta, abstractmethod
-from dataclasses import dataclass
-from pathlib import Path
-from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union)
-from sentencepiece import SentencePieceProcessor  # type: ignore
-
-if TYPE_CHECKING:
-    from typing_extensions import TypeAlias
-
-if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
-    faulthandler.register(signal.SIGUSR1)
-
-NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
-
-@dataclass(frozen=True)
-class UnquantizedDataType:
-    name: str
-
-DT_F16  = UnquantizedDataType('F16')
-DT_F32  = UnquantizedDataType('F32')
-DT_I32  = UnquantizedDataType('I32')
-DT_BF16 = UnquantizedDataType('BF16')
-
-DataType = Union[UnquantizedDataType]
-
-DATA_TYPE_TO_FTYPE: Dict[DataType, int] = {
-    DT_F32: 0,
-    DT_F16: 1,
-}
-
-FTYPE_TO_DATA_TYPE: Dict[int, DataType] = \
-    {ftype: dtype for (dtype, ftype) in DATA_TYPE_TO_FTYPE.items()}
-
-DATA_TYPE_TO_NUMPY: Dict[DataType, 'np.dtype[Any]'] = {
-    DT_BF16: np.dtype(np.uint16),
-    DT_F16:  np.dtype(np.float16),
-    DT_F32:  np.dtype(np.float32),
-    DT_I32:  np.dtype(np.int32),
-}
-
-NUMPY_TYPE_TO_DATA_TYPE: Dict['np.dtype[Any]', DataType] = \
-    {dtype: data_type for (data_type, dtype) in DATA_TYPE_TO_NUMPY.items()}
-
-class GGMLFileType(enum.Enum):
-    AllF32    = 0
-    MostlyF16 = 1  # except 1d tensors
-
-    def type_for_tensor(self, name: str, tensor: 'LazyTensor') -> DataType:
-        if len(tensor.shape) == 1:
-            # 1D tensors are always F32.
-            return DT_F32
-        elif self == GGMLFileType.AllF32:
-            return DT_F32
-        elif self == GGMLFileType.MostlyF16:
-            return DT_F16
-        else:
-            raise ValueError(self)
-
-# TODO: this is LLaMA specific
-def make_tensors_list() -> List[str]:
-    ret = [
-        'tok_embeddings.weight',
-        'norm.weight',
-        'output.weight',
-    ]
-    for i in range(80):  # maximum number of layer
-        ret += [
-            f'layers.{i}.attention.wq.weight',
-            f'layers.{i}.attention.wk.weight',
-            f'layers.{i}.attention.wv.weight',
-            f'layers.{i}.attention.wo.weight',
-            f'layers.{i}.attention_norm.weight',
-            f'layers.{i}.feed_forward.w1.weight',
-            f'layers.{i}.feed_forward.w2.weight',
-            f'layers.{i}.feed_forward.w3.weight',
-            f'layers.{i}.ffn_norm.weight',
-        ]
-    return ret
-
-# TODO: this should be generalized for non-LLaMA models
-TENSORS_LIST = make_tensors_list()
-TENSORS_SET = set(TENSORS_LIST)
-
-def find_n_mult(n_ff: int, n_embd: int) -> int:
-    # hardcoded magic range
-    for n_mult in range(8192, 1, -1):
-        calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
-        if calc_ff == n_ff:
-            return n_mult
-    raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
-
-
-@dataclass
-class Params:
-    n_vocab:   int
-    n_embd:    int
-    n_mult:    int
-    n_head:    int
-    n_layer:   int
-    n_kv_head: Optional[int]  # This parameter is only used for Llama 2
-
-    @staticmethod
-    def guessed(model: 'LazyModel') -> 'Params':
-        # try transformer naming first
-        n_vocab, n_embd = model["model.embed_tokens.weight"].shape if "model.embed_tokens.weight" in model else model["tok_embeddings.weight"].shape
-
-        # try transformer naming first
-        if "model.layers.0.self_attn.q_proj.weight" in model:
-            n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
-        elif "model.layers.0.self_attn.W_pack.weight" in model:   # next: try baichuan naming
-            n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
-        else:
-            n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
-
-        if n_layer < 1:
-            raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
-                            "Suggestion: provide 'config.json' of the model in the same directory containing model files.")
-
-        n_head=n_embd // 128 # guessed
-
-        return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = 256,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = None,
-        )
-
-    @staticmethod
-    def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
-        config = json.load(open(config_path))
-
-        n_vocab   = config["vocab_size"];
-        n_embd    = config["hidden_size"];
-        n_head    = config["num_attention_heads"];
-        n_layer   = config["num_hidden_layers"];
-        n_ff      = config["intermediate_size"];
-        n_kv_head = config.get("num_key_value_heads")
-
-        n_mult = find_n_mult(n_ff, n_embd);
-
-        return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = n_mult,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = n_kv_head,
-        )
-
-    # LLaMA v2 70B params.json
-    # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1
-    @staticmethod
-    def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
-        config = json.load(open(config_path))
-
-        n_vocab = config["vocab_size"];
-        n_embd  = config["dim"];
-        n_head  = config["n_heads"];
-        n_layer = config["n_layers"];
-        n_mult  = config["multiple_of"];
-
-        if n_vocab == -1:
-            n_vocab = model["tok_embeddings.weight"].shape[0]
-
-        return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = n_mult,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = None,
-        )
-
-    @staticmethod
-    def load(model_plus: 'ModelPlus') -> 'Params':
-        hf_config_path   = model_plus.paths[0].parent / "config.json"
-        orig_config_path = model_plus.paths[0].parent / "params.json"
-
-        if hf_config_path.exists():
-            params = Params.loadHFTransformerJson(model_plus.model, hf_config_path)
-        elif orig_config_path.exists():
-            params = Params.loadOriginalParamsJson(model_plus.model, orig_config_path)
-        else:
-            params = Params.guessed(model_plus.model)
-
-        print(f'params: n_vocab:{params.n_vocab} n_embd:{params.n_embd} n_mult:{params.n_mult} n_head:{params.n_head} n_layer:{params.n_layer}')
-        return params
-
-
-class BpeVocab:
-    def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) -> None:
-        self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
-        added_tokens: Dict[str, int]
-        if fname_added_tokens is not None:
-            added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
-        else:
-            added_tokens = {}
-        vocab_size: int = len(self.bpe_tokenizer)
-        expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
-        actual_ids = sorted(added_tokens.values())
-        if expected_ids != actual_ids:
-            raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}")
-        items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
-        self.added_tokens_list = [text for (text, idx) in items]
-        self.vocab_size_base: int = vocab_size
-        self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_list)
-        self.fname_tokenizer = fname_tokenizer
-        self.fname_added_tokens = fname_added_tokens
-
-    def bpe_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        tokenizer = self.bpe_tokenizer
-        from transformers.models.gpt2 import tokenization_gpt2
-        byte_encoder = tokenization_gpt2.bytes_to_unicode()
-        byte_decoder = {v: k for k, v in byte_encoder.items()}
-        for i, item in enumerate(tokenizer):
-            text: bytes = item.encode("utf-8")
-            score: float = -i
-            yield text, score
-
-    def added_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        for text in self.added_tokens_list:
-            score = -1000.0
-            yield text.encode("utf-8"), score
-
-    def all_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        yield from self.bpe_tokens()
-        yield from self.added_tokens()
-
-    def __repr__(self) -> str:
-        return f"BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
-
-
-class SentencePieceVocab:
-    def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) -> None:
-        self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
-        added_tokens: Dict[str, int]
-        if fname_added_tokens is not None:
-            added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
-        else:
-            added_tokens = {}
-        vocab_size: int = self.sentencepiece_tokenizer.vocab_size()
-        expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
-        actual_ids   = sorted(added_tokens.values())
-        if expected_ids != actual_ids:
-            raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}")
-
-        items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
-        self.added_tokens_list = [text for (text, idx) in items]
-        self.vocab_size_base: int = vocab_size
-        self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_list)
-        self.fname_tokenizer = fname_tokenizer
-        self.fname_added_tokens = fname_added_tokens
-
-    def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        tokenizer = self.sentencepiece_tokenizer
-        for i in range(tokenizer.vocab_size()):
-            piece = tokenizer.id_to_piece(i)
-            text: bytes = piece.encode("utf-8")
-            score: float = tokenizer.get_score(i)
-            yield text, score
-
-    def added_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        for text in self.added_tokens_list:
-            score = -1000.0
-            yield text.encode("utf-8"), score
-
-    def all_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        yield from self.sentencepiece_tokens()
-        yield from self.added_tokens()
-
-    def __repr__(self) -> str:
-        return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
-
-
-Vocab = Union[BpeVocab, SentencePieceVocab]
-
-
-def permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
-    if n_kv_head is not None and n_head != n_kv_head:
-        n_head //= n_kv_head
-    return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
-                .swapaxes(1, 2)
-                .reshape(weights.shape))
-
-
-class Tensor(metaclass=ABCMeta):
-    data_type: DataType
-
-    @abstractmethod
-    def astype(self, data_type: DataType) -> 'Tensor': ...
-    @abstractmethod
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'Tensor': ...
-    @abstractmethod
-    def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
-    @abstractmethod
-    def part(self, n_part: int) -> 'UnquantizedTensor': ...
-    @abstractmethod
-    def to_ggml(self) -> 'GGMLCompatibleTensor': ...
-
-
-def bf16_to_fp32(bf16_arr: np.ndarray) -> np.ndarray:
-    assert bf16_arr.dtype == np.uint16, f"Input array should be of dtype uint16, but got {bf16_arr.dtype}"
-    fp32_arr = bf16_arr.astype(np.uint32) << 16
-    return fp32_arr.view(np.float32)
-
-
-class UnquantizedTensor(Tensor):
-    def __init__(self, ndarray: NDArray) -> None:
-        assert isinstance(ndarray, np.ndarray)
-        self.ndarray = ndarray
-        self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
-
-    def astype(self, data_type: DataType) -> Tensor:
-        dtype = DATA_TYPE_TO_NUMPY[data_type]
-        if self.data_type == DT_BF16:
-            self.ndarray = bf16_to_fp32(self.ndarray)
-        return UnquantizedTensor(self.ndarray.astype(dtype))
-
-    def to_ggml(self) -> 'UnquantizedTensor':
-        return self
-
-    def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
-        r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
-
-    def part(self, n_part: int) -> 'UnquantizedTensor':
-        r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
-
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'UnquantizedTensor':
-        return UnquantizedTensor(permute(self.ndarray, n_head, n_kv_head))
-
-
-def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, convert: bool = False) -> NDArray:
-    tensor = lazy_tensor.load()
-    assert isinstance(tensor, UnquantizedTensor)
-
-    # double-check:
-    actual_shape = list(tensor.ndarray.shape)
-    assert actual_shape == lazy_tensor.shape, (actual_shape, lazy_tensor.shape)
-    if expected_dtype is not None and expected_dtype != tensor.ndarray.dtype:
-        if convert:
-            tensor.ndarray = tensor.ndarray.astype(expected_dtype)
-        else:
-            raise ValueError(f'expected this tensor to have dtype {expected_dtype}, got {tensor.ndarray.dtype}')
-
-    return tensor.ndarray
-
-
-GGMLCompatibleTensor = Union[UnquantizedTensor]
-
-
-class DeferredPermutedTensor(Tensor):
-    def __init__(self, base: Tensor, n_head: int, n_kv_head: Optional[int] = None) -> None:
-        self.base = base
-        self.n_head = n_head
-        self.data_type = self.base.data_type
-
-    def astype(self, data_type: DataType) -> Tensor:
-        return self.base.astype(data_type).permute(self.n_head, self.n_kv_head)
-
-    def to_ggml(self) -> GGMLCompatibleTensor:
-        return self.base.to_ggml().permute(self.n_head, self.n_kv_head)
-
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> Tensor:
-        raise Exception("shouldn't permute twice")
-
-
-@dataclass
-class LazyTensor:
-    _load: Callable[[], Tensor]
-    shape: List[int]
-    data_type: DataType
-    description: str
-
-    def load(self) -> Tensor:
-        ret = self._load()
-        assert ret.data_type == self.data_type, (self.data_type, ret.data_type, self.description)
-        return ret
-
-    def astype(self, data_type: DataType) -> 'LazyTensor':
-        self.validate_conversion_to(data_type)
-
-        def load() -> Tensor:
-            return self.load().astype(data_type)
-        return LazyTensor(load, self.shape, data_type, f'convert({data_type}) {self.description}')
-
-    def validate_conversion_to(self, data_type: DataType) -> None:
-        if data_type == self.data_type:
-            return
-
-
-LazyModel = Dict[str, LazyTensor]
-
-
-@dataclass
-class ModelPlus:
-    model: LazyModel
-    paths: List[Path]  # Where this was read from.
-    format: Literal['ggml', 'torch', 'safetensors']
-    vocab: Optional[Vocab]  # For GGML models (which have vocab built in), the vocab.
-
-
-def merge_sharded(models: List[LazyModel]) -> LazyModel:
-    # Original LLaMA models have each file contain one part of each tensor.
-    # Use a dict instead of a set to preserve order.
-    names = {name: None for model in models for name in model}
-
-    def convert(name: str) -> LazyTensor:
-        lazy_tensors: List[LazyTensor] = [model[name] for model in models]
-        if len(lazy_tensors) == 1:
-            # only one file; don't go through this procedure since there might
-            # be quantized tensors
-            return lazy_tensors[0]
-        if len(lazy_tensors[0].shape) == 1:
-            # the tensor is just duplicated in every file
-            return lazy_tensors[0]
-        if name.startswith('tok_embeddings.') or \
-           name.endswith('.attention.wo.weight') or \
-           name.endswith('.feed_forward.w2.weight'):
-            # split by columns
-            axis = 1
-        else:
-            # split by rows
-            axis = 0
-        concatenated_shape = list(lazy_tensors[0].shape)
-        concatenated_shape[axis] = sum(tensor.shape[axis] for tensor in lazy_tensors)
-
-        def load() -> UnquantizedTensor:
-            ndarrays = [load_unquantized(tensor) for tensor in lazy_tensors]
-            concatenated: NDArray = np.concatenate(ndarrays, axis=axis)
-            return UnquantizedTensor(concatenated)
-        description = 'concatenated[[' + '] | ['.join(lt.description for lt in lazy_tensors) + ']]'
-        return LazyTensor(load, concatenated_shape, lazy_tensors[0].data_type, description)
-    return {name: convert(name) for name in names}
-
-
-def merge_multifile_models(models_plus: List[ModelPlus]) -> ModelPlus:
-    formats = set(mp.format for mp in models_plus)
-    assert len(formats) == 1, "different formats?"
-    format = formats.pop()
-    paths = [path for mp in models_plus for path in mp.paths]
-    # Use the first non-None vocab, if any.
-    try:
-        vocab = next(mp.vocab for mp in models_plus if mp.vocab is not None)
-    except StopIteration:
-        vocab = None
-
-    if any("model.embed_tokens.weight" in mp.model for mp in models_plus):
-        # Transformers models put different tensors in different files, but
-        # don't split indivdual tensors between files.
-        model: LazyModel = {}
-        for mp in models_plus:
-            model.update(mp.model)
-    else:
-        model = merge_sharded([mp.model for mp in models_plus])
-
-    return ModelPlus(model, paths, format, vocab)
-
-
-def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_kv_head: Optional[int] = None) -> LazyTensor:
-    def load() -> Tensor:
-        return lazy_tensor.load().permute(n_head, n_kv_head)
-    return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_kv_head}) ' + lazy_tensor.description)
-
-def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
-    def load() -> Tensor:
-        return lazy_tensor.load().permute_part(n_part, n_head)
-    s = lazy_tensor.shape.copy()
-    s[0] = s[0] // 3
-    return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
-
-def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
-    def load() -> Tensor:
-        return lazy_tensor.load().part(n_part)
-    s = lazy_tensor.shape.copy()
-    s[0] = s[0] // 3
-    return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
-
-def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
-    out: LazyModel = {}
-    out["tok_embeddings.weight"] = model["model.embed_tokens.weight"]
-    out["norm.weight"]           = model["model.norm.weight"]
-    out["output.weight"]         = model["lm_head.weight"]
-
-    for i in itertools.count():
-        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
-            out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
-            out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_kv_head)
-            out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
-        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
-            out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
-            out[f"layers.{i}.attention.wk.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
-            out[f"layers.{i}.attention.wv.weight"] = part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
-        else:
-            break
-
-        out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]
-
-        out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"]
-        out[f"layers.{i}.feed_forward.w2.weight"] = model[f"model.layers.{i}.mlp.down_proj.weight"]
-        out[f"layers.{i}.feed_forward.w3.weight"] = model[f"model.layers.{i}.mlp.up_proj.weight"]
-
-        out[f"layers.{i}.attention_norm.weight"] = model[f"model.layers.{i}.input_layernorm.weight"]
-        out[f"layers.{i}.ffn_norm.weight"]       = model[f"model.layers.{i}.post_attention_layernorm.weight"]
-    return out
-
-
-# Functionality that simulates `torch.load` but where individual tensors are
-# only loaded into memory on demand, not all at once.
-# PyTorch can't do this natively as of time of writing:
-# - https://github.com/pytorch/pytorch/issues/64327
-# This allows us to de-shard without multiplying RAM usage, and also
-# conveniently drops the PyTorch dependency (though we still need numpy).
-
-
-@dataclass
-class LazyStorageKind:
-    data_type: DataType
-
-
-@dataclass
-class LazyStorage:
-    load: Callable[[int, int], NDArray]
-    kind: LazyStorageKind
-    description: str
-
-
-class LazyUnpickler(pickle.Unpickler):
-    def __init__(self, fp: IO[bytes], data_base_path: str, zip_file: zipfile.ZipFile):
-        super().__init__(fp)
-        self.data_base_path = data_base_path
-        self.zip_file = zip_file
-
-    def persistent_load(self, pid: Any) -> Any:
-        assert pid[0] == 'storage'
-        assert isinstance(pid[1], LazyStorageKind)
-        data_type = pid[1].data_type
-        filename_stem = pid[2]
-        filename = self.data_base_path + '/' + filename_stem
-        info = self.zip_file.getinfo(filename)
-
-        def load(offset: int, elm_count: int) -> NDArray:
-            dtype = DATA_TYPE_TO_NUMPY.get(data_type)
-            if dtype is None:
-                raise Exception("tensor stored in unsupported format")
-            fp = self.zip_file.open(info)
-            fp.seek(offset * dtype.itemsize)
-            size = elm_count * dtype.itemsize
-            data = fp.read(size)
-            assert len(data) == size
-            return np.frombuffer(data, dtype)
-        description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
-        return LazyStorage(load=load, kind=pid[1], description=description)
-
-    # @staticmethod
-    def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any,
-                               # pyright: ignore[reportSelfClsParameterName]
-                               requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor:
-        assert isinstance(storage, LazyStorage)
-
-        def load() -> UnquantizedTensor:
-            elm_count = stride[0] * size[0]
-            return UnquantizedTensor(storage.load(storage_offset, elm_count).reshape(size))
-        description = f'pickled storage_offset={storage_offset} in {storage.description}'
-        return LazyTensor(load, list(size), storage.kind.data_type, description)
-
-    # @staticmethod
-    def rebuild_from_type_v2(func, new_type, args, state):
-        return func(*args)
-
-    CLASSES: Dict[Any, Any] = {
-        ('torch._tensor', '_rebuild_from_type_v2'): rebuild_from_type_v2,
-        ('torch._utils', '_rebuild_tensor_v2'): lazy_rebuild_tensor_v2,
-        ('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16),
-        ('torch', 'HalfStorage'): LazyStorageKind(DT_F16),
-        ('torch', 'FloatStorage'): LazyStorageKind(DT_F32),
-        ('torch', 'IntStorage'): LazyStorageKind(DT_I32),
-        ('torch', 'Tensor'): LazyTensor,
-    }
-
-    def find_class(self, module: str, name: str) -> Any:
-        if not module.startswith('torch'):
-            return super().find_class(module, name)
-        return self.CLASSES[(module, name)]
-
-
-def lazy_load_torch_file(outer_fp: IO[bytes], path: Path) -> ModelPlus:
-    zf = zipfile.ZipFile(outer_fp)
-    pickle_paths = [name for name in zf.namelist() if name.endswith('.pkl')]
-    assert len(pickle_paths) == 1, pickle_paths
-    pickle_fp = zf.open(pickle_paths[0], 'r')
-    unpickler = LazyUnpickler(pickle_fp,
-                              data_base_path=pickle_paths[0][:-4],
-                              zip_file=zf)
-    model = unpickler.load()
-    as_dict = dict(model.items())
-    return ModelPlus(model=as_dict, paths=[path], format='torch', vocab=None)
-
-
-SAFETENSORS_DATA_TYPES: Dict[str, DataType] = {
-    'BF16': DT_BF16,
-    'F16': DT_F16,
-    'F32': DT_F32,
-    'I32': DT_I32,
-}
-
-
-def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
-    header_size, = struct.unpack('<Q', fp.read(8))
-    header: Dict[str, Dict[str, Any]] = json.loads(fp.read(header_size))
-    # Use mmap for the actual data to avoid race conditions with the file offset.
-    mapped = memoryview(mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ))
-    byte_buf = mapped[8 + header_size:]
-
-    def convert(info: Dict[str, Any]) -> LazyTensor:
-        data_type = SAFETENSORS_DATA_TYPES[info['dtype']]
-        numpy_dtype = DATA_TYPE_TO_NUMPY[data_type]
-        shape: List[int] = info['shape']
-        begin, end = info['data_offsets']
-        assert 0 <= begin <= end <= len(byte_buf)
-        assert end - begin == math.prod(shape) * numpy_dtype.itemsize
-        buf = byte_buf[begin:end]
-
-        def load() -> UnquantizedTensor:
-            return UnquantizedTensor(np.frombuffer(buf, dtype=numpy_dtype).reshape(shape))
-        description = f'safetensors begin={begin} end={end} type={data_type} path={path}'
-        return LazyTensor(load, shape, data_type, description)
-    model = {name: convert(info) for (name, info) in header.items() if name != '__metadata__'}
-    return ModelPlus(model=model, paths=[path], format='safetensors', vocab=None)
-
-
-def must_read(fp: IO[bytes], length: int) -> bytes:
-    ret = fp.read(length)
-    if len(ret) < length:
-        raise Exception("unexpectedly reached end of file")
-    return ret
-
-
-@functools.lru_cache(maxsize=None)
-def lazy_load_file(path: Path) -> ModelPlus:
-    fp = open(path, 'rb')
-    first8 = fp.read(8)
-    fp.seek(0)
-    if first8[:2] == b'PK':
-        # A zip file, i.e. PyTorch format
-        return lazy_load_torch_file(fp, path)
-    elif struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
-        # Probably safetensors
-        return lazy_load_safetensors_file(fp, path)
-    else:
-        raise ValueError(f"unknown format: {path}")
-
-
-In = TypeVar('In')
-Out = TypeVar('Out')
-
-
-def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int) -> Iterable[Out]:
-    '''Parallel map, but with backpressure.  If the caller doesn't call `next`
-    fast enough, this will stop calling `func` at some point rather than
-    letting results pile up in memory.  Specifically, there is a max of one
-    output value buffered per thread.'''
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        futures: List[concurrent.futures.Future[Out]] = []
-        items_rev = list(iterable)[::-1]
-        for i in range(min(concurrency, len(items_rev))):
-            futures.append(executor.submit(func, items_rev.pop()))
-        while futures:
-            result = futures.pop(0).result()
-            if items_rev:
-                futures.append(executor.submit(func, items_rev.pop()))
-            yield result
-
-
-def check_vocab_size(params: Params, vocab: Vocab) -> None:
-    if params.n_vocab != vocab.vocab_size:
-        assert isinstance(vocab, BpeVocab) or isinstance(vocab, SentencePieceVocab)
-        if params.n_vocab == vocab.vocab_size_base:
-            print("Ignoring added_tokens.json since model matches vocab size without it.")
-            vocab.added_tokens_list = []
-            vocab.vocab_size = vocab.vocab_size_base
-            return
-        msg = f"Vocab size mismatch (model has {params.n_vocab}, but {vocab.fname_tokenizer}"
-        if vocab.fname_added_tokens is not None:
-            msg += f" combined with {vocab.fname_added_tokens}"
-        msg += f" has {vocab.vocab_size})."
-        if vocab.vocab_size < params.n_vocab < vocab.vocab_size + 20 and vocab.fname_added_tokens is None:
-            msg += f"  Most likely you are missing added_tokens.json (should be in {vocab.fname_tokenizer.parent})."
-        raise Exception(msg)
-
-
-class OutputFile:
-    def __init__(self, fname_out: Path) -> None:
-        self.fout = open(fname_out, "wb")
-
-    def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
-        self.fout.write(b"ggjt"[::-1])  # magic
-        values = [
-            1,  # file version
-            params.n_vocab,
-            params.n_embd,
-            params.n_mult,
-            params.n_head,
-            params.n_layer,
-            params.n_embd // params.n_head,  # rot (obsolete)
-            file_type.value,
-        ]
-        self.fout.write(struct.pack("i" * len(values), *values))
-
-    def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
-        sname = name.encode('utf-8')
-        self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
-        self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
-        self.fout.write(sname)
-        self.fout.seek((self.fout.tell() + 31) & -32)
-
-    def write_vocab(self, vocab: Vocab) -> None:
-        for text, score in vocab.all_tokens():
-            self.fout.write(struct.pack("i", len(text)))
-            self.fout.write(text)
-            self.fout.write(struct.pack("f", score))
-
-    @staticmethod
-    def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
-        of = OutputFile(fname_out)
-        params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0)
-        of = OutputFile(fname_out)
-        of.write_file_header(params, file_type=GGMLFileType.AllF32)
-        of.write_vocab(vocab)
-        of.fout.close()
-
-    @staticmethod
-    def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None:
-        check_vocab_size(params, vocab)
-        of = OutputFile(fname_out)
-        of.write_file_header(params, file_type)
-        print("Writing vocab...")
-        of.write_vocab(vocab)
-
-        def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
-            name, lazy_tensor = item
-            return lazy_tensor.load().to_ggml().ndarray
-
-        ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
-        for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
-            size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
-            padi = len(str(len(model)))
-            print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
-            of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
-            ndarray.tofile(of.fout)
-        of.fout.close()
-
-
-def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
-    wq_type = model["layers.0.attention.wq.weight"].data_type
-    if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)):
-        return GGMLFileType.AllF32
-    if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16):
-        return GGMLFileType.MostlyF16
-    name_to_type = {name: lazy_tensor.data_type for (name, lazy_tensor) in model.items()}
-    raise Exception(f"Unexpected combination of types: {name_to_type}")
-
-
-def do_necessary_conversions(model: LazyModel, params: Params) -> LazyModel:
-    if "lm_head.weight" in model:
-        model = convert_transformers_to_orig(model, params)
-    model = filter_and_sort_tensors(model)
-
-    return model
-
-
-def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
-    return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
-            for (name, tensor) in model.items()}
-
-
-def nth_multifile_path(path: Path, n: int) -> Optional[Path]:
-    '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
-    the nth path in the model.
-    '''
-    # Support the following patterns:
-    patterns: List[Tuple[str, str]] = [
-        # - x.00.pth, x.01.pth, etc.
-        (r'\.[0-9]{2}\.pth$', f'.{n:02}.pth'),
-        # - x-00001-of-00002.bin, x-00002-of-00002.bin, etc.
-        (r'-[0-9]{5}-of-(.*)$', fr'-{n:05}-of-\1'),
-        # x.bin, x.bin.1, etc.
-        (r'(\.[0-9]+)?$', r'\1' if n == 0 else fr'\1.{n}')
-    ]
-    for regex, replacement in patterns:
-        if re.search(regex, path.name):
-            new_path = path.with_name(re.sub(regex, replacement, path.name))
-            if new_path.exists():
-                return new_path
-    return None
-
-
-def find_multifile_paths(path: Path) -> List[Path]:
-    '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
-    the whole list of paths in the model.
-    '''
-    ret: List[Path] = []
-    for i in itertools.count():
-        nth_path = nth_multifile_path(path, i)
-        if nth_path is None:
-            break
-        ret.append(nth_path)
-    if not ret:
-        # No matches.  This should only happen if the file was named, e.g.,
-        # foo.0, and there was no file named foo.  Oh well, try to process it
-        # as a single file.
-        return [path]
-    return ret
-
-
-def load_some_model(path: Path) -> ModelPlus:
-    '''Load a model of any supported format.'''
-    # Be extra-friendly and accept either a file or a directory:
-    if path.is_dir():
-        # Check if it's a set of safetensors files first
-        files = list(path.glob("model-00001-of-*.safetensors"))
-        if not files:
-            # Try the PyTorch patterns too, with lower priority
-            globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
-            files = [file for glob in globs for file in path.glob(glob)]
-        if not files:
-            # Try GGML too, but with lower priority, since if both a non-GGML
-            # model and a GGML model exist in the same directory, we assume the
-            # latter was converted from the former.
-            files = list(path.glob("ggml-model*.bin*"))
-        if not files:
-            raise Exception(f"Can't find model in directory {path}")
-        if len(files) > 1:
-            raise Exception(f"Found multiple models in {path}, not sure which to pick: {files}")
-        path = files[0]
-
-    paths = find_multifile_paths(path)
-    models_plus: List[ModelPlus] = []
-    for path in paths:
-        print(f"Loading model file {path}")
-        models_plus.append(lazy_load_file(path))
-
-    model_plus = merge_multifile_models(models_plus)
-    return model_plus
-
-
-def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
-    return {name: model[name] for name in TENSORS_LIST if name in model}
-
-
-def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, SentencePieceVocab]:
-    print(f"vocabtype: {vocabtype}")
-    # Be extra-friendly and accept either a file or a directory.  Also, if it's
-    # a directory, it might be the model directory, and tokenizer.model might
-    # be in the parent of that.
-    if path.is_dir():
-        vocab_file = "tokenizer.model"
-        if vocabtype == 'bpe':
-            vocab_file = "vocab.json"
-        path2 = path / vocab_file
-        # Use `.parent` instead of /.. to handle the symlink case better.
-        path3 = path.parent / vocab_file
-        if path2.exists():
-            path = path2
-        elif path3.exists():
-            path = path3
-        else:
-            raise FileNotFoundError(
-                f"Could not find tokenizer.model in {path} or its parent; "
-                "if it's in another directory, pass the directory as --vocab-dir")
-    added_tokens_path = path.parent / "added_tokens.json"
-    print(f"Loading vocab file {path}")
-    if vocabtype == "bpe":
-        return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
-    elif vocabtype == "spm":
-        return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None)
-    else:
-        raise ValueError(f"Unsupported vocabulary type {vocabtype}")
-
-
-def default_outfile(model_paths: List[Path], file_type: GGMLFileType) -> Path:
-    namestr = {
-        GGMLFileType.AllF32:    "f32",
-        GGMLFileType.MostlyF16: "f16",
-    }[file_type]
-    ret = model_paths[0].parent / f"ggml-model-{namestr}.bin"
-    if ret in model_paths:
-        sys.stderr.write(
-            f"Error: Default output path ({ret}) would overwrite the input. "
-            "Please explicitly specify a path using --outfile.\n")
-        sys.exit(1)
-    return ret
-
-
-def do_dump_model(model_plus: ModelPlus) -> None:
-    print(f"model_plus.paths = {model_plus.paths!r}")
-    print(f"model_plus.format = {model_plus.format!r}")
-    print(f"model_plus.vocab = {model_plus.vocab!r}")
-    for name, lazy_tensor in model_plus.model.items():
-        print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
-
-
-def main(args_in: Optional[List[str]] = None) -> None:
-    parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
-    parser.add_argument("--dump",        action="store_true",    help="don't convert, just show what's in the model")
-    parser.add_argument("--dump-single", action="store_true",    help="don't convert, just show what's in a single model file")
-    parser.add_argument("--vocab-only",  action="store_true",    help="extract only the vocab")
-    parser.add_argument("--outtype",     choices=["f32", "f16"], help="output format (default: based on input)")
-    parser.add_argument("--vocab-dir",   type=Path,              help="directory containing tokenizer.model, if separate from model file")
-    parser.add_argument("--outfile",     type=Path,              help="path to write to; default: based on input")
-    parser.add_argument("model",         type=Path,              help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
-    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)")
-    args = parser.parse_args(args_in)
-
-    vocab: Vocab
-    if args.dump_single:
-        model_plus = lazy_load_file(args.model)
-        do_dump_model(model_plus)
-    elif args.vocab_only:
-        vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
-        assert args.outfile, "need --outfile if using --vocab-only"
-        outfile = args.outfile
-        OutputFile.write_vocab_only(outfile, vocab)
-        print(f"Wrote {outfile}")
-    else:
-        model_plus = load_some_model(args.model)
-        if args.dump:
-            do_dump_model(model_plus)
-            return
-        if model_plus.vocab is not None and args.vocab_dir is None:
-            vocab = model_plus.vocab
-        else:
-            vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
-            vocab = load_vocab(vocab_dir, args.vocabtype)
-
-        params      = Params.load(model_plus)
-        model       = model_plus.model
-        model       = do_necessary_conversions(model, params)
-        output_type = pick_output_type(model, args.outtype)
-        model       = convert_to_output_type(model, output_type)
-        outfile     = args.outfile or default_outfile(model_plus.paths, output_type)
-
-        OutputFile.write_all(outfile, params, output_type, model, vocab)
-        print(f"Wrote {outfile}")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/convert.py b/convert.py
old mode 100644
new mode 100755
index d9d0c0b38..f6237579d
--- a/convert.py
+++ b/convert.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+
+import gguf
 import argparse
 import concurrent.futures
 import copy
@@ -16,13 +18,12 @@ import signal
 import struct
 import sys
 import zipfile
+import numpy as np
+
 from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
-from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List,
-                    Literal, Optional, Sequence, Tuple, TypeVar, Union)
-
-import numpy as np
+from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union)
 from sentencepiece import SentencePieceProcessor  # type: ignore
 
 if TYPE_CHECKING:
@@ -33,57 +34,44 @@ if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
 
 NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
 
+ARCH=gguf.MODEL_ARCH.LLAMA
+NAMES=gguf.MODEL_TENSOR_NAMES[ARCH]
+
+#
+# data types
+#
 
 @dataclass(frozen=True)
 class UnquantizedDataType:
     name: str
 
-
-DT_F16 = UnquantizedDataType('F16')
-DT_F32 = UnquantizedDataType('F32')
-DT_I32 = UnquantizedDataType('I32')
+DT_F16  = UnquantizedDataType('F16')
+DT_F32  = UnquantizedDataType('F32')
+DT_I32  = UnquantizedDataType('I32')
 DT_BF16 = UnquantizedDataType('BF16')
 
-
-@dataclass(frozen=True)
-class QuantizedDataType:
-    groupsize: int
-    have_addends: bool
-    have_g_idx: bool
-
-
-DT_Q4_0 = QuantizedDataType(groupsize=32, have_addends=False, have_g_idx=False)
-DT_Q4_1 = QuantizedDataType(groupsize=32, have_addends=True, have_g_idx=False)
-
-DataType = Union[UnquantizedDataType, QuantizedDataType]
-
-DATA_TYPE_TO_FTYPE: Dict[DataType, int] = {
-    DT_F32: 0,
-    DT_F16: 1,
-    DT_Q4_0: 2,
-    DT_Q4_1: 3,
-}
-
-FTYPE_TO_DATA_TYPE: Dict[int, DataType] = \
-    {ftype: dtype for (dtype, ftype) in DATA_TYPE_TO_FTYPE.items()}
+DataType = Union[UnquantizedDataType]
 
 DATA_TYPE_TO_NUMPY: Dict[DataType, 'np.dtype[Any]'] = {
     DT_BF16: np.dtype(np.uint16),
-    DT_F16: np.dtype(np.float16),
-    DT_F32: np.dtype(np.float32),
-    DT_I32: np.dtype(np.int32),
+    DT_F16:  np.dtype(np.float16),
+    DT_F32:  np.dtype(np.float32),
+    DT_I32:  np.dtype(np.int32),
 }
 
 NUMPY_TYPE_TO_DATA_TYPE: Dict['np.dtype[Any]', DataType] = \
     {dtype: data_type for (data_type, dtype) in DATA_TYPE_TO_NUMPY.items()}
 
+SAFETENSORS_DATA_TYPES: Dict[str, DataType] = {
+    'BF16': DT_BF16,
+    'F16': DT_F16,
+    'F32': DT_F32,
+    'I32': DT_I32,
+}
 
 class GGMLFileType(enum.Enum):
-    AllF32 = 0
+    AllF32    = 0
     MostlyF16 = 1  # except 1d tensors
-    MostlyQ4_0 = 2  # except 1d tensors
-    MostlyQ4_1 = 3  # except 1d tensors
-    PerLayerIsQ4_1 = 4  # but tok_embeddings.weight and output.weight are F16
 
     def type_for_tensor(self, name: str, tensor: 'LazyTensor') -> DataType:
         if len(tensor.shape) == 1:
@@ -93,60 +81,34 @@ class GGMLFileType(enum.Enum):
             return DT_F32
         elif self == GGMLFileType.MostlyF16:
             return DT_F16
-        elif self == GGMLFileType.MostlyQ4_0:
-            return DT_Q4_0
-        elif self == GGMLFileType.MostlyQ4_1:
-            return DT_Q4_1
-        elif self == GGMLFileType.PerLayerIsQ4_1:
-            if name in ('output.weight', 'tok_embeddings.weight'):
-                return DT_F16
-            else:
-                return DT_Q4_1
         else:
             raise ValueError(self)
 
 
-def make_tensors_list() -> List[str]:
-    ret = [
-        'tok_embeddings.weight',
-        'norm.weight',
-        'output.weight',
-    ]
-    for i in range(80):  # maximum number of layer
-        ret += [
-            f'layers.{i}.attention.wq.weight',
-            f'layers.{i}.attention.wk.weight',
-            f'layers.{i}.attention.wv.weight',
-            f'layers.{i}.attention.wo.weight',
-            f'layers.{i}.attention_norm.weight',
-            f'layers.{i}.feed_forward.w1.weight',
-            f'layers.{i}.feed_forward.w2.weight',
-            f'layers.{i}.feed_forward.w3.weight',
-            f'layers.{i}.ffn_norm.weight',
-        ]
-    return ret
-
-
-TENSORS_LIST = make_tensors_list()
-TENSORS_SET = set(TENSORS_LIST)
-
-
-def find_n_mult(n_ff: int, n_embd: int) -> int:
-    # hardcoded magic range
-    for n_mult in range(8192, 1, -1):
-        calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
-        if calc_ff == n_ff:
-            return n_mult
-    raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
+#
+# hparams loading
+#
 
 @dataclass
 class Params:
-    n_vocab:   int
-    n_embd:    int
-    n_mult:    int
-    n_head:    int
-    n_layer:   int
-    n_kv_head: Optional[int]  # This parameter is only used for Llama 2
+    n_vocab:    int
+    n_embd:     int
+    n_mult:     int
+    n_layer:    int
+    n_ctx:      int
+    n_ff:       int
+    n_head:     int
+    n_head_kv:  int
+    f_norm_eps: float
+
+    @staticmethod
+    def find_n_mult(n_ff: int, n_embd: int) -> int:
+        # hardcoded magic range
+        for n_mult in range(8192, 1, -1):
+            calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
+            if calc_ff == n_ff:
+                return n_mult
+        raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
 
     @staticmethod
     def guessed(model: 'LazyModel') -> 'Params':
@@ -165,37 +127,57 @@ class Params:
             raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
                             "Suggestion: provide 'config.json' of the model in the same directory containing model files.")
 
-        n_head=n_embd // 128 # guessed
+        n_head = n_embd // 128 # guessed
+        n_mult = 256           # guessed
+
+        # TODO: verify this
+        n_ff = int(2 * (4 * n_embd) / 3)
+        n_ff = n_mult * ((n_ff + n_mult - 1) // n_mult)
 
         return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = 256,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = None,
+            n_vocab    = n_vocab,
+            n_embd     = n_embd,
+            n_mult     = n_mult,
+            n_layer    = n_layer,
+            n_ctx      = -1,
+            n_ff       = n_ff,
+            n_head     = n_head,
+            n_head_kv  = n_head,
+            f_norm_eps = 1e-5,
         )
 
     @staticmethod
     def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
         config = json.load(open(config_path))
 
-        n_vocab = config["vocab_size"];
-        n_embd  = config["hidden_size"];
-        n_head  = config["num_attention_heads"];
-        n_layer = config["num_hidden_layers"];
-        n_ff    = config["intermediate_size"];
-        n_kv_head = config.get("num_key_value_heads")
+        n_vocab    = config["vocab_size"]
+        n_embd     = config["hidden_size"]
+        n_layer    = config["num_hidden_layers"]
+        n_ff       = config["intermediate_size"]
+        n_head     = config["num_attention_heads"]
+        n_head_kv  = config["num_key_value_heads"] if "num_key_value_heads" in config else n_head
+        f_norm_eps = config["rms_norm_eps"]
 
-        n_mult = find_n_mult(n_ff, n_embd);
+        n_mult = Params.find_n_mult(n_ff, n_embd)
+
+        if "max_sequence_length" in config:
+            n_ctx = config["max_sequence_length"]
+        elif "max_position_embeddings" in config:
+            n_ctx = config["max_position_embeddings"]
+        else:
+            raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
+                            "Suggestion: provide 'config.json' of the model in the same directory containing model files.")
 
         return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = n_mult,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = n_kv_head,
+            n_vocab    = n_vocab,
+            n_embd     = n_embd,
+            n_mult     = n_mult,
+            n_layer    = n_layer,
+            n_ctx      = n_ctx,
+            n_ff       = n_ff,
+            n_head     = n_head,
+            n_head_kv  = n_head_kv,
+            f_norm_eps = f_norm_eps,
         )
 
     # LLaMA v2 70B params.json
@@ -204,22 +186,32 @@ class Params:
     def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
         config = json.load(open(config_path))
 
-        n_vocab   = config["vocab_size"];
-        n_embd    = config["dim"];
-        n_head    = config["n_heads"];
-        n_layer   = config["n_layers"];
-        n_mult    = config["multiple_of"];
+        n_vocab    = config["vocab_size"]
+        n_embd     = config["dim"]
+        n_layer    = config["n_layers"]
+        n_mult     = config["multiple_of"]
+        n_ctx      = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
+        n_ff       = -1
+        n_head     = config["n_heads"]
+        n_head_kv  = config["n_kv_heads"] if "n_kv_heads" in config else n_head
+        f_norm_eps = config["norm_eps"]
 
         if n_vocab == -1:
             n_vocab = model["tok_embeddings.weight"].shape[0]
 
+        if n_ff == -1:
+            n_ff = model["layers.0.feed_forward.w1.weight"].shape[0]
+
         return Params(
-            n_vocab   = n_vocab,
-            n_embd    = n_embd,
-            n_mult    = n_mult,
-            n_head    = n_head,
-            n_layer   = n_layer,
-            n_kv_head = None,
+            n_vocab    = n_vocab,
+            n_embd     = n_embd,
+            n_mult     = n_mult,
+            n_layer    = n_layer,
+            n_ctx      = n_ctx,
+            n_ff       = n_ff,
+            n_head     = n_head,
+            n_head_kv  = n_head_kv,
+            f_norm_eps = f_norm_eps,
         )
 
     @staticmethod
@@ -234,10 +226,13 @@ class Params:
         else:
             params = Params.guessed(model_plus.model)
 
-        print(f'params: n_vocab:{params.n_vocab} n_embd:{params.n_embd} n_mult:{params.n_mult} n_head:{params.n_head} n_layer:{params.n_layer}')
         return params
 
 
+#
+# vocab
+#
+
 class BpeVocab:
     def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) -> None:
         self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
@@ -291,9 +286,10 @@ class SentencePieceVocab:
             added_tokens = {}
         vocab_size: int = self.sentencepiece_tokenizer.vocab_size()
         expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
-        actual_ids = sorted(added_tokens.values())
+        actual_ids   = sorted(added_tokens.values())
         if expected_ids != actual_ids:
             raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}")
+
         items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
         self.added_tokens_list = [text for (text, idx) in items]
         self.vocab_size_base: int = vocab_size
@@ -321,76 +317,29 @@ class SentencePieceVocab:
     def __repr__(self) -> str:
         return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
 
-
-class GGMLVocab:
-    def __init__(self, tokens: List[Tuple[bytes, float]]):
-        self.tokens = tokens
-        self.vocab_size = len(tokens)
-
-    def all_tokens(self) -> Iterable[Tuple[bytes, float]]:
-        return self.tokens
-
-    def __repr__(self) -> str:
-        return f"<GGMLVocab with {self.vocab_size} tokens>"
+Vocab = Union[BpeVocab, SentencePieceVocab]
 
 
-Vocab = Union[BpeVocab, SentencePieceVocab, GGMLVocab]
+#
+# data loading
+# TODO: reuse (probably move to gguf.py?)
+#
 
-
-def permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
-    if n_kv_head is not None and n_head != n_kv_head:
-        n_head //= n_kv_head
+def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
+    if n_head_kv is not None and n_head != n_head_kv:
+        n_head //= n_head_kv
     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
                 .swapaxes(1, 2)
                 .reshape(weights.shape))
 
 
-def dequantize_q4(qvalues_pack32: NDArray, scales: NDArray, addends: Optional[NDArray], g_idx: Optional[NDArray]) -> NDArray:
-    # First reinterpret each row from a list of int32s containing 8 values each
-    # to a list of uint8s containing 2 values each.
-    qvalues_pack8 = qvalues_pack32.view(np.uint8)
-
-    # Then split out the two values per int8 (which requires an actual
-    # conversion because numpy doesn't natively support int4s).
-    qvalues = np.zeros([qvalues_pack8.shape[0], qvalues_pack8.shape[1] * 2], dtype=np.uint8)
-    qvalues[:, 0::2] = qvalues_pack8 & 0xf
-    qvalues[:, 1::2] = qvalues_pack8 >> 4
-
-    assert addends is None or addends.shape == scales.shape
-    assert qvalues.shape[0] == scales.shape[0]
-    assert qvalues.shape[1] % scales.shape[1] == 0
-    if g_idx is None:
-        repeat_count = qvalues.shape[1] // scales.shape[1]
-        scales = scales[:, :, np.newaxis]
-        if addends is not None:
-            addends = addends[:, :, np.newaxis]
-        # Reshape so that the below computation broadcasts over scales and addends:
-        qvalues.shape = (qvalues.shape[0], scales.shape[1], int(repeat_count))
-    else:
-        # In this case the scale and addend is selected for each column by g_idx:
-        assert addends is not None
-        scales = scales[:, g_idx]
-        addends = addends[:, g_idx]
-    if addends is None:
-        # Q4_0
-        qvalues = qvalues.view(np.int8)
-        qvalues -= 8
-    # And do the actual 'value = scale * qvalue + addend' computation.
-    values = scales * qvalues
-    if addends is not None:
-        values += addends
-    if g_idx is None:
-        values.shape = (values.shape[0], values.shape[1] * values.shape[2])
-    return values
-
-
 class Tensor(metaclass=ABCMeta):
     data_type: DataType
 
     @abstractmethod
     def astype(self, data_type: DataType) -> 'Tensor': ...
     @abstractmethod
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'Tensor': ...
+    def permute(self, n_head: int, n_head_kv: int) -> 'Tensor': ...
     @abstractmethod
     def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
     @abstractmethod
@@ -428,8 +377,8 @@ class UnquantizedTensor(Tensor):
         r = self.ndarray.shape[0] // 3
         return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
 
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'UnquantizedTensor':
-        return UnquantizedTensor(permute(self.ndarray, n_head, n_kv_head))
+    def permute(self, n_head: int, n_head_kv: int) -> 'UnquantizedTensor':
+        return UnquantizedTensor(permute(self.ndarray, n_head, n_head_kv))
 
 
 def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, convert: bool = False) -> NDArray:
@@ -448,183 +397,25 @@ def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, conv
     return tensor.ndarray
 
 
-class GGMLQuantizedTensor(Tensor):
-    data_type: QuantizedDataType
-
-    def __init__(self, ndarray: NDArray, shape: List[int], data_type: DataType) -> None:
-        rows, columns = shape
-        assert data_type in (DT_Q4_1, DT_Q4_0)  # for now
-        assert isinstance(data_type, QuantizedDataType)  # redundant, but mypy complains without this
-        assert columns % data_type.groupsize == 0
-        words_in_block = 6 if data_type == DT_Q4_1 else 5
-        self.ndarray = ndarray.view(dtype=np.uint32).reshape((rows, columns // data_type.groupsize, words_in_block))
-        self.shape = shape[:]
-        self.data_type = data_type
-
-    def astype(self, data_type: DataType) -> Tensor:
-        if data_type == self.data_type:
-            return self
-        scales = self.ndarray[:, :, 0].view(np.float32)
-        if self.data_type.have_addends:
-            addends = self.ndarray[:, :, 1].view(np.float32)
-        else:
-            addends = None
-        qweights = self.ndarray[:, :, -4:].reshape([self.shape[0], self.shape[1] // 8])
-
-        dq = dequantize_q4(qweights, scales, addends, g_idx=None)
-        return UnquantizedTensor(dq).astype(data_type)
-
-    def to_ggml(self) -> 'GGMLQuantizedTensor':
-        return self
-
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'GGMLQuantizedTensor':
-        return GGMLQuantizedTensor(permute(self.ndarray, n_head, n_kv_head), self.shape, self.data_type)
-
-    def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
-        r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
-
-    def part(self, n_part: int) -> 'UnquantizedTensor':
-        r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
-
-GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor]
+GGMLCompatibleTensor = Union[UnquantizedTensor]
 
 
 class DeferredPermutedTensor(Tensor):
-    def __init__(self, base: Tensor, n_head: int, n_kv_head: Optional[int] = None) -> None:
+    def __init__(self, base: Tensor, n_head: int, n_head_kv: int) -> None:
         self.base = base
         self.n_head = n_head
-        self.n_kv_head = n_kv_head
         self.data_type = self.base.data_type
 
     def astype(self, data_type: DataType) -> Tensor:
-        return self.base.astype(data_type).permute(self.n_head, self.n_kv_head)
+        return self.base.astype(data_type).permute(self.n_head, self.n_head_kv)
 
     def to_ggml(self) -> GGMLCompatibleTensor:
-        return self.base.to_ggml().permute(self.n_head, self.n_kv_head)
+        return self.base.to_ggml().permute(self.n_head, self.n_head_kv)
 
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> Tensor:
+    def permute(self, n_head: int, n_head_kv: int) -> Tensor:
         raise Exception("shouldn't permute twice")
 
 
-class GPTQForLLaMaQuantizedTensor(Tensor):
-    def __init__(self, model: 'LazyModel', namebase: str) -> None:
-        qweight = load_unquantized(model[f"{namebase}.qweight"], np.int32)
-        scales = load_unquantized(model[f"{namebase}.scales"], np.float32, convert=True)
-
-        bias = model.get(f"{namebase}.bias")
-        if bias is not None:
-            # Q4_1 does not support bias; good thing the bias is always all zeros.
-            assert not np.any(load_unquantized(bias))
-
-        if f"{namebase}.zeros" in model:
-            zeros = load_unquantized(model[f"{namebase}.zeros"], np.float32)
-        else:
-            qzeros = load_unquantized(model[f"{namebase}.qzeros"], np.int32)
-            assert qzeros.dtype == np.int32
-            zeros = dequantize_q4(qzeros, scales, scales, g_idx=None)
-            assert zeros.dtype == np.float32
-
-        assert zeros.shape == scales.shape
-
-        # Output is transposed compared to the input, and addends have their sign flipped.
-        # Scales and zeros similarly must be transposed but only for newer
-        # versions of GPTQ-for-LLaMa; the older versions can be identified by
-        # having shape (n_embd, 1).
-        qweight = qweight.T
-        if scales.shape[1] != 1:
-            scales = scales.T
-            zeros = zeros.T
-
-        # Output also has signs flipped for the addends.
-        self.qweight = qweight
-        self.scales = scales
-        self.addends = -zeros
-
-        self.g_idx: Optional[NDArray]
-        if f"{namebase}.g_idx" in model:
-            self.g_idx = load_unquantized(model[f"{namebase}.g_idx"], np.int32)
-            assert self.g_idx.shape == (qweight.shape[1] * 8,)
-        else:
-            self.g_idx = None
-
-        self.shape = [self.qweight.shape[0], self.qweight.shape[1] * 8]
-        self.data_type = QuantizedDataType(groupsize=self.groupsize(), have_addends=True,
-                                           have_g_idx=(self.g_idx is not None))
-
-    def inspect(self, row: int, col: int) -> None:
-        '''For debugging.'''
-        qweight = (self.qweight[row, col // 8] >> (4 * (col & 7))) & 0xf
-        if self.g_idx is not None:
-            group = self.g_idx[col]
-        else:
-            group = int(col // self.groupsize())
-        scale = self.scales[row, group]
-        addend = self.addends[row, group]
-        with np.printoptions(precision=None, suppress=True):
-            print(f'scale:{scale} addend:{addend} qweight:{qweight}')
-            print('possible values:', np.arange(16) * scale + addend)
-            print('actual value:', qweight * scale + addend)
-
-    def astype(self, data_type: DataType) -> Tensor:
-        if isinstance(data_type, QuantizedDataType):
-            assert self.g_idx is None and data_type.have_addends is True and data_type.have_g_idx is False
-            return self.regroup(data_type.groupsize)
-
-        dequantized = dequantize_q4(np.ascontiguousarray(self.qweight), self.scales, self.addends, self.g_idx)
-        return UnquantizedTensor(dequantized).astype(data_type)
-
-    def groupsize(self) -> int:
-        assert self.addends.shape == self.scales.shape
-        assert self.shape[1] % self.scales.shape[1] == 0
-        return self.shape[1] // self.scales.shape[1]
-
-    def regroup(self, new_groupsize: int = 32) -> 'GPTQForLLaMaQuantizedTensor':
-        # Old versions of GPTQ-for-LLaMa shared scales and addends between all the
-        # columns in a row.  Newer versions share them between every set of N
-        # columns in a row, where N is the `groupsize` parameter, usually 128.  The
-        # output format shares them between every set of 32 columns.  To handle
-        # this, duplicate scales and addends for every smaller group.
-        # (In the above, 'row' and 'column' are in the sense of the output.)
-        assert self.g_idx is None
-        old_groupsize = self.groupsize()
-        assert old_groupsize >= new_groupsize and old_groupsize % new_groupsize == 0, old_groupsize
-        ret = copy.copy(self)
-        ret.addends = self.addends.repeat(old_groupsize // new_groupsize, axis=1)
-        ret.scales = self.scales.repeat(old_groupsize // new_groupsize, axis=1)
-        ret.data_type = QuantizedDataType(groupsize=new_groupsize, have_addends=True, have_g_idx=False)
-        return ret
-
-    def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> Tensor:
-        return DeferredPermutedTensor(self, n_head, n_kv_head)
-
-    def to_ggml(self) -> GGMLQuantizedTensor:
-        # The output format looks like this:
-        # For each row:
-        #   For each group of 32 columns:
-        #     - addend (float32, 4 bytes)
-        #     - scale (float32, 4 bytes)
-        #     - weights (int4 * 32, 16 bytes)
-
-        if self.groupsize() != 32:
-            raise Exception("should have been regrouped before converting to ggml")
-
-        # Since the output format is mixed between integers and floats, we have
-        # to hackily view the floats as int32s just so numpy will let us
-        # concatenate them.
-        addends_view = self.addends.view(dtype=np.int32)[:, :, np.newaxis]
-        scales_view = self.scales.view(dtype=np.int32)[:, :, np.newaxis]
-
-        # Split into groups of 4 columns (i.e. 32 columns of quantized data):
-        grouped = self.qweight.reshape([self.qweight.shape[0], self.qweight.shape[1] // 4, 4])
-
-        # And concatenate:
-        grouped = np.concatenate([scales_view, addends_view, grouped], axis=2, casting='no')
-
-        return GGMLQuantizedTensor(grouped, self.shape, DT_Q4_1)
-
-
 @dataclass
 class LazyTensor:
     _load: Callable[[], Tensor]
@@ -647,17 +438,6 @@ class LazyTensor:
     def validate_conversion_to(self, data_type: DataType) -> None:
         if data_type == self.data_type:
             return
-        if isinstance(data_type, QuantizedDataType):
-            if not isinstance(self.data_type, QuantizedDataType):
-                raise Exception(f"Can't turn an unquantized tensor into a quantized type ({data_type})")
-            if self.data_type.have_g_idx:
-                sys.stderr.write(
-                    "Error: Input uses the newer GPTQ-for-LLaMa format (using g_idx), "
-                    "which is not yet natively supported by GGML. "
-                    "For now you can still convert this model by passing `--outtype f16` to dequantize, "
-                    "but that will result in a much larger output file for no quality benefit.\n")
-                sys.exit(1)
-            assert not data_type.have_g_idx and self.data_type.have_addends and data_type.have_addends
 
 
 LazyModel = Dict[str, LazyTensor]
@@ -728,10 +508,10 @@ def merge_multifile_models(models_plus: List[ModelPlus]) -> ModelPlus:
     return ModelPlus(model, paths, format, vocab)
 
 
-def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_kv_head: Optional[int] = None) -> LazyTensor:
+def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -> LazyTensor:
     def load() -> Tensor:
-        return lazy_tensor.load().permute(n_head, n_kv_head)
-    return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_kv_head}) ' + lazy_tensor.description)
+        return lazy_tensor.load().permute(n_head, n_head_kv)
+    return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_head_kv}) ' + lazy_tensor.description)
 
 def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
     def load() -> Tensor:
@@ -747,66 +527,6 @@ def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
     s[0] = s[0] // 3
     return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
 
-def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
-    out: LazyModel = {}
-    out["tok_embeddings.weight"] = model["model.embed_tokens.weight"]
-    out["norm.weight"] = model["model.norm.weight"]
-    out["output.weight"] = model["lm_head.weight"]
-
-    for i in itertools.count():
-        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
-            out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
-            out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_kv_head)
-            out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
-        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
-            out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
-            out[f"layers.{i}.attention.wk.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
-            out[f"layers.{i}.attention.wv.weight"] = part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
-        else:
-            break
-
-        out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]
-
-        out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"]
-        out[f"layers.{i}.feed_forward.w2.weight"] = model[f"model.layers.{i}.mlp.down_proj.weight"]
-        out[f"layers.{i}.feed_forward.w3.weight"] = model[f"model.layers.{i}.mlp.up_proj.weight"]
-
-        out[f"layers.{i}.attention_norm.weight"] = model[f"model.layers.{i}.input_layernorm.weight"]
-        out[f"layers.{i}.ffn_norm.weight"] = model[f"model.layers.{i}.post_attention_layernorm.weight"]
-    return out
-
-
-def handle_quantization(model: LazyModel) -> LazyModel:
-    '''Convert a model with entries for 'foo.qweight', 'foo.scales', etc.
-    (which resolve to UnquantizedTensors with the raw data) to one with entries
-    for 'foo.weight' (which resolve to QuantizedTensors).
-    '''
-    def convert(name: str) -> Tuple[str, LazyTensor]:
-        if name.endswith(".qweight"):
-            namebase = name.rsplit('.', 1)[0]
-            orig_name = namebase + ".weight"
-
-            lazy_tensor = model[name]
-            assert len(lazy_tensor.shape) == 2
-            real_shape = [lazy_tensor.shape[1], lazy_tensor.shape[0] * 8]
-
-            # Calculate type.  This replicates the logic in
-            # GPTQForLLaMaQuantizedTensor (which is executed when the modelis
-            # actually loaded).
-            lazy_scales = model[f"{namebase}.scales"]
-            scales_width = 1 if lazy_scales.shape[1] == 1 else lazy_scales.shape[0]
-            assert real_shape[1] % scales_width == 0
-            groupsize = real_shape[1] // scales_width
-            have_g_idx = f"{namebase}.g_idx" in model
-            data_type = QuantizedDataType(groupsize=groupsize, have_addends=True, have_g_idx=have_g_idx)
-
-            def load() -> Tensor:
-                return GPTQForLLaMaQuantizedTensor(model, namebase)
-
-            return (orig_name, LazyTensor(load, real_shape, data_type, '[quantized]'))
-        else:
-            return (name, model[name])
-    return dict(convert(name) for name in model)
 
 # Functionality that simulates `torch.load` but where individual tensors are
 # only loaded into memory on demand, not all at once.
@@ -900,14 +620,6 @@ def lazy_load_torch_file(outer_fp: IO[bytes], path: Path) -> ModelPlus:
     return ModelPlus(model=as_dict, paths=[path], format='torch', vocab=None)
 
 
-SAFETENSORS_DATA_TYPES: Dict[str, DataType] = {
-    'BF16': DT_BF16,
-    'F16': DT_F16,
-    'F32': DT_F32,
-    'I32': DT_I32,
-}
-
-
 def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
     header_size, = struct.unpack('<Q', fp.read(8))
     header: Dict[str, Dict[str, Any]] = json.loads(fp.read(header_size))
@@ -939,84 +651,6 @@ def must_read(fp: IO[bytes], length: int) -> bytes:
     return ret
 
 
-def lazy_load_ggml_file(fp: io.BufferedReader, path: Path) -> ModelPlus:
-    magic = must_read(fp, 4)[::-1]
-    if magic in (b'ggmf', b'ggjt'):
-        version, = struct.unpack("i", must_read(fp, 4))
-        assert version == 1
-    else:
-        assert magic == b'ggml'
-        version = None
-    n_vocab, n_embd, n_mult, n_head, n_layer, rot, file_type = struct.unpack('<7i', must_read(fp, 28))
-
-    tokens: List[Tuple[bytes, float]] = []
-    for i in range(n_vocab):
-        if i == 32000:
-            # HACK: GPT4All messed with the format without changing the magic
-            # number.  Specifically, they changed the vocab section to contain
-            # `n_vocab - 1` tokens instead of `n_vocab` (i.e. omitting the
-            # extra pad token).  Try to detect if we're reading a file like
-            # this.
-            orig_pos = fp.tell()
-            fp.seek(20, io.SEEK_CUR)
-            is_gpt4all = fp.read(21) == b'tok_embeddings.weight'
-            fp.seek(orig_pos)
-            if is_gpt4all:
-                break
-
-        length, = struct.unpack("i", must_read(fp, 4))
-        text = must_read(fp, length)
-        if magic != b'ggml':
-            score, = struct.unpack("f", must_read(fp, 4))
-            tokens.append((text, score))
-    vocab = GGMLVocab(tokens) if magic != b'ggml' else None
-
-    model: LazyModel = {}
-    # Use mmap for the actual data to avoid race conditions with the file offset.
-    off = fp.raw.tell()
-    mapped = memoryview(mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ))
-    fp.raw.seek(off)  # needed on Windows
-
-    def read_tensor() -> None:  # this is a function so that variables captured in `load` don't change
-        shape_len, name_len, ftype = struct.unpack("iii", must_read(fp, 12))
-        assert 0 <= shape_len <= 3
-        shape: List[int] = list(struct.unpack(f"{shape_len}i", must_read(fp, 4 * shape_len)))
-        shape = shape[::-1]
-        name = must_read(fp, name_len).decode('utf-8')
-        data_type = FTYPE_TO_DATA_TYPE[ftype]
-
-        if magic == b'ggjt':
-            fp.seek((fp.tell() + 31) & -32)
-
-        if data_type == DT_Q4_1:
-            # See GPTQForLLaMaQuantizedTensor.ggml_ndarray()
-            size = 24 * (shape[1] // 32) * shape[0]
-        elif data_type == DT_Q4_0:
-            size = 20 * (shape[1] // 32) * shape[0]
-        else:
-            numpy_dtype = DATA_TYPE_TO_NUMPY[data_type]
-            elm_count = math.prod(shape)
-            size = elm_count * numpy_dtype.itemsize
-        offset = fp.tell()
-        buf = mapped[offset:offset+size]
-        fp.seek(size, io.SEEK_CUR)
-
-        def load() -> Tensor:
-            if isinstance(data_type, QuantizedDataType):
-                ndarray = np.frombuffer(buf, dtype=np.uint32)
-                return GGMLQuantizedTensor(ndarray, shape, data_type)
-            else:
-                return UnquantizedTensor(np.frombuffer(buf, dtype=numpy_dtype).reshape(shape))
-        description = f'ggml offset={offset} type={data_type} path={path}'
-        model[name] = LazyTensor(load, shape, data_type, description)
-
-    while fp.read(1) != b'':
-        fp.seek(-1, io.SEEK_CUR)
-        read_tensor()
-
-    return ModelPlus(model=model, paths=[path], format='ggml', vocab=vocab)
-
-
 @functools.lru_cache(maxsize=None)
 def lazy_load_file(path: Path) -> ModelPlus:
     fp = open(path, 'rb')
@@ -1025,9 +659,6 @@ def lazy_load_file(path: Path) -> ModelPlus:
     if first8[:2] == b'PK':
         # A zip file, i.e. PyTorch format
         return lazy_load_torch_file(fp, path)
-    elif first8[2:4] == b'gg':
-        # GGML format
-        return lazy_load_ggml_file(fp, path)
     elif struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
         # Probably safetensors
         return lazy_load_safetensors_file(fp, path)
@@ -1038,7 +669,6 @@ def lazy_load_file(path: Path) -> ModelPlus:
 In = TypeVar('In')
 Out = TypeVar('Out')
 
-
 def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int) -> Iterable[Out]:
     '''Parallel map, but with backpressure.  If the caller doesn't call `next`
     fast enough, this will stop calling `func` at some point rather than
@@ -1058,7 +688,6 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc
 
 def check_vocab_size(params: Params, vocab: Vocab) -> None:
     if params.n_vocab != vocab.vocab_size:
-        # GGMLVocab comes from the same file as the model so shouldn't mismatch:
         assert isinstance(vocab, BpeVocab) or isinstance(vocab, SentencePieceVocab)
         if params.n_vocab == vocab.vocab_size_base:
             print("Ignoring added_tokens.json since model matches vocab size without it.")
@@ -1076,98 +705,151 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
 
 class OutputFile:
     def __init__(self, fname_out: Path) -> None:
-        self.fout = open(fname_out, "wb")
+        self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
 
-    def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
-        self.fout.write(b"ggjt"[::-1])  # magic
-        values = [
-            1,  # file version
-            params.n_vocab,
-            params.n_embd,
-            params.n_mult,
-            params.n_head,
-            params.n_layer,
-            params.n_embd // params.n_head,  # rot (obsolete)
-            file_type.value,
-        ]
-        self.fout.write(struct.pack("i" * len(values), *values))
+    def add_meta_arch(self, params: Params) -> None:
+        self.gguf.add_context_length      (params.n_ctx)
+        self.gguf.add_embedding_length    (params.n_embd)
+        self.gguf.add_block_count         (params.n_layer)
+        self.gguf.add_feed_forward_length (params.n_ff)
+        self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
+        self.gguf.add_head_count          (params.n_head)
+        self.gguf.add_head_count_kv       (params.n_head_kv)
+        self.gguf.add_layer_norm_rms_eps  (params.f_norm_eps)
 
-    def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
-        sname = name.encode('utf-8')
-        self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
-        self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
-        self.fout.write(sname)
-        self.fout.seek((self.fout.tell() + 31) & -32)
-
-    def write_vocab(self, vocab: Vocab) -> None:
+    def add_meta_vocab(self, vocab: Vocab) -> None:
+        tokens = []
+        scores = []
         for text, score in vocab.all_tokens():
-            self.fout.write(struct.pack("i", len(text)))
-            self.fout.write(text)
-            self.fout.write(struct.pack("f", score))
+            tokens.append(text)
+            scores.append(score)
+
+        self.gguf.add_tokenizer_model("llama")
+        self.gguf.add_token_list(tokens)
+        self.gguf.add_token_scores(scores)
+        #self.gguf.add_token_types(toktypes) # TODO: add this
+
+        # TODO: added / special tokens
+
+    def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
+        n_elements = 1
+        for dim in tensor.shape:
+            n_elements *= dim
+        data_type = DATA_TYPE_TO_NUMPY[tensor.data_type]
+        data_nbytes = n_elements * data_type.itemsize
+        self.gguf.add_tensor_info(name, tensor.shape, data_type, data_nbytes)
+
+    def write_meta(self) -> None:
+        self.gguf.write_header_to_file()
+        self.gguf.write_kv_data_to_file()
+
+    def write_tensor_info(self) -> None:
+        self.gguf.write_ti_data_to_file()
+
+    def close(self) -> None:
+        self.gguf.close()
 
     @staticmethod
-    def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
-        of = OutputFile(fname_out)
-        params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, n_kv_head=None)
-        of = OutputFile(fname_out)
-        of.write_file_header(params, file_type=GGMLFileType.AllF32)
-        of.write_vocab(vocab)
-        of.fout.close()
-
-    @staticmethod
-    def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None:
+    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
         check_vocab_size(params, vocab)
+
         of = OutputFile(fname_out)
-        of.write_file_header(params, file_type)
-        print("Writing vocab...")
-        of.write_vocab(vocab)
+
+        # meta data
+        of.add_meta_arch(params)
+        of.add_meta_vocab(vocab)
+        of.write_meta()
+
+        of.close()
+
+    @staticmethod
+    def write_all(fname_out: Path, params: Params, model: LazyModel, vocab: Vocab) -> None:
+        check_vocab_size(params, vocab)
+
+        of = OutputFile(fname_out)
+
+        # meta data
+        of.add_meta_arch(params)
+        of.add_meta_vocab(vocab)
+
+        # tensor info
+        for name, lazy_tensor in model.items():
+            of.add_tensor_info(name, lazy_tensor)
+
+        of.write_meta()
+        of.write_tensor_info()
 
         def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
             name, lazy_tensor = item
             return lazy_tensor.load().to_ggml().ndarray
 
+        # tensor data
         ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
         for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
             size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
             padi = len(str(len(model)))
             print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
-            of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
-            ndarray.tofile(of.fout)
-        of.fout.close()
+            of.gguf.write_tensor_data(ndarray)
 
+        of.close()
 
 def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
-    wq_type = model["layers.0.attention.wq.weight"].data_type
-    if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)):
+    wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
+
+    if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
         return GGMLFileType.AllF32
-    if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16):
+    if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)):
         return GGMLFileType.MostlyF16
-    if output_type_str == "q4_1" or (output_type_str is None and isinstance(wq_type, QuantizedDataType) and
-                                     wq_type.have_addends):
-        if isinstance(model["output.weight"].data_type, QuantizedDataType):
-            return GGMLFileType.MostlyQ4_1
-        else:
-            return GGMLFileType.PerLayerIsQ4_1
-    if output_type_str == "q4_0" or (output_type_str is None and isinstance(wq_type, QuantizedDataType)):
-        return GGMLFileType.MostlyQ4_0
+
     name_to_type = {name: lazy_tensor.data_type for (name, lazy_tensor) in model.items()}
+
     raise Exception(f"Unexpected combination of types: {name_to_type}")
 
-
-def do_necessary_conversions(model: LazyModel, params: Params) -> LazyModel:
-    model = handle_quantization(model)
-
-    if "lm_head.weight" in model:
-        model = convert_transformers_to_orig(model, params)
-    model = filter_and_sort_tensors(model)
-
-    return model
-
-
 def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
     return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
             for (name, tensor) in model.items()}
 
+def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
+    tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
+
+    tmp = model
+
+    # HF models permut or pack some of the tensors, so we need to undo that
+    for i in itertools.count():
+        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
+            print(f"Permuting layer {i}")
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
+           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
+        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
+            print(f"Unpacking and permuting layer {i}")
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
+        else:
+            break
+
+    out: LazyModel = {}
+    for name, lazy_tensor in model.items():
+        name_new = name
+
+        if name in tmap:
+            name_new = tmap[name]
+        elif name.endswith(".weight") and name[:-7] in tmap:
+            name_new = tmap[name[:-7]] + ".weight"
+        elif name.endswith(".bias") and name[:-5] in tmap:
+            name_new = tmap[name[:-5]] + ".bias"
+        else:
+            raise Exception(f"Unexpected tensor name: {name}")
+
+        if gguf.should_skip_tensor_TMP(ARCH, params.n_layer, name_new):
+            print(f"skipping tensor {name_new}")
+            continue
+        else:
+            print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type} | {lazy_tensor.shape}")
+            out[name_new] = lazy_tensor
+
+    return out
 
 def nth_multifile_path(path: Path, n: int) -> Optional[Path]:
     '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
@@ -1218,11 +900,6 @@ def load_some_model(path: Path) -> ModelPlus:
             # Try the PyTorch patterns too, with lower priority
             globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
             files = [file for glob in globs for file in path.glob(glob)]
-        if not files:
-            # Try GGML too, but with lower priority, since if both a non-GGML
-            # model and a GGML model exist in the same directory, we assume the
-            # latter was converted from the former.
-            files = list(path.glob("ggml-model*.bin*"))
         if not files:
             raise Exception(f"Can't find model in directory {path}")
         if len(files) > 1:
@@ -1239,19 +916,14 @@ def load_some_model(path: Path) -> ModelPlus:
     return model_plus
 
 
-def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
-    return {name: model[name] for name in TENSORS_LIST if name in model}
-
-
 def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, SentencePieceVocab]:
-    print(f"vocabtype: {vocabtype}")
     # Be extra-friendly and accept either a file or a directory.  Also, if it's
     # a directory, it might be the model directory, and tokenizer.model might
     # be in the parent of that.
     if path.is_dir():
         vocab_file = "tokenizer.model"
         if vocabtype == 'bpe':
-          vocab_file = "vocab.json"
+            vocab_file = "vocab.json"
         path2 = path / vocab_file
         # Use `.parent` instead of /.. to handle the symlink case better.
         path3 = path.parent / vocab_file
@@ -1263,8 +935,10 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, Sentence
             raise FileNotFoundError(
                 f"Could not find tokenizer.model in {path} or its parent; "
                 "if it's in another directory, pass the directory as --vocab-dir")
+
+    print(f"Loading vocab file '{path}', type '{vocabtype}'")
+
     added_tokens_path = path.parent / "added_tokens.json"
-    print(f"Loading vocab file {path}")
     if vocabtype == "bpe":
         return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
     elif vocabtype == "spm":
@@ -1275,13 +949,10 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, Sentence
 
 def default_outfile(model_paths: List[Path], file_type: GGMLFileType) -> Path:
     namestr = {
-        GGMLFileType.AllF32: "f32",
+        GGMLFileType.AllF32:    "f32",
         GGMLFileType.MostlyF16: "f16",
-        GGMLFileType.MostlyQ4_0: "q4_0",
-        GGMLFileType.MostlyQ4_1: "q4_1",
-        GGMLFileType.PerLayerIsQ4_1: "q4_1",
     }[file_type]
-    ret = model_paths[0].parent / f"ggml-model-{namestr}.bin"
+    ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
     if ret in model_paths:
         sys.stderr.write(
             f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1300,44 +971,59 @@ def do_dump_model(model_plus: ModelPlus) -> None:
 
 def main(args_in: Optional[List[str]] = None) -> None:
     parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
-    parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
-    parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
-    parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
-    parser.add_argument("--outtype", choices=["f32", "f16", "q4_1", "q4_0"], help="output format (default: based on input)")
-    parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
-    parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
-    parser.add_argument("model", type=Path,
-                        help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
-    parser.add_argument("--vocabtype", default='spm', choices=["spm", "bpe"], help="vocab format (default: spm)")
+    parser.add_argument("--dump",        action="store_true",    help="don't convert, just show what's in the model")
+    parser.add_argument("--dump-single", action="store_true",    help="don't convert, just show what's in a single model file")
+    parser.add_argument("--vocab-only",  action="store_true",    help="extract only the vocab")
+    parser.add_argument("--outtype",     choices=["f32", "f16"], help="output format (default: based on input)")
+    parser.add_argument("--vocab-dir",   type=Path,              help="directory containing tokenizer.model, if separate from model file")
+    parser.add_argument("--outfile",     type=Path,              help="path to write to; default: based on input")
+    parser.add_argument("model",         type=Path,              help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
+    parser.add_argument("--vocabtype",   choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
+    parser.add_argument("--ctx",         type=int,               help="model training context (default: based on input)")
     args = parser.parse_args(args_in)
 
-    vocab: Vocab
     if args.dump_single:
         model_plus = lazy_load_file(args.model)
         do_dump_model(model_plus)
-    elif args.vocab_only:
+
+    model_plus = load_some_model(args.model)
+
+    params = Params.load(model_plus)
+    if params.n_ctx == -1:
+        if args.ctx is None:
+            raise Exception("The model doesn't have a context size, and you didn't specify one with --ctx\n"
+                            "Please specify one with --ctx:\n"
+                            " - LLaMA v1: --ctx 2048\n"
+                            " - LLaMA v2: --ctx 4096\n")
+        params.n_ctx = args.ctx
+
+    print(f"params = {params}")
+
+    vocab: Vocab
+    if args.vocab_only:
         vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
         assert args.outfile, "need --outfile if using --vocab-only"
         outfile = args.outfile
-        OutputFile.write_vocab_only(outfile, vocab)
+        OutputFile.write_vocab_only(outfile, params, vocab)
         print(f"Wrote {outfile}")
     else:
-        model_plus = load_some_model(args.model)
         if args.dump:
             do_dump_model(model_plus)
             return
+
         if model_plus.vocab is not None and args.vocab_dir is None:
             vocab = model_plus.vocab
         else:
             vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
             vocab = load_vocab(vocab_dir, args.vocabtype)
-        params = Params.load(model_plus)
-        model = model_plus.model
-        model = do_necessary_conversions(model, params)
+
+        model       = model_plus.model
+        model       = convert_model_names(model, params)
         output_type = pick_output_type(model, args.outtype)
-        model = convert_to_output_type(model, output_type)
-        outfile = args.outfile or default_outfile(model_plus.paths, output_type)
-        OutputFile.write_all(outfile, params, output_type, model, vocab)
+        model       = convert_to_output_type(model, output_type)
+        outfile     = args.outfile or default_outfile(model_plus.paths, output_type)
+
+        OutputFile.write_all(outfile, params, model, vocab)
         print(f"Wrote {outfile}")
 
 
diff --git a/docs/token_generation_performance_tips.md b/docs/token_generation_performance_tips.md
index 69ba6173c..c9acff7d4 100644
--- a/docs/token_generation_performance_tips.md
+++ b/docs/token_generation_performance_tips.md
@@ -3,7 +3,7 @@
 ## Verifying that the model is running on the GPU with cuBLAS
 Make sure you compiled llama with the correct env variables according to [this guide](../README.md#cublas), so that llama accepts the `-ngl N` (or `--n-gpu-layers N`) flag. When running llama, you may configure `N` to be very large, and llama will offload the maximum possible number of layers to the GPU, even if it's less than the number you configured. For example:
 ```shell
-./main -m "path/to/model.bin" -ngl 200000 -p "Please sir, may I have some "
+./main -m "path/to/model.gguf" -ngl 200000 -p "Please sir, may I have some "
 ```
 
 When running llama, before it starts the inference work, it will output diagnostic information that shows whether cuBLAS is offloading work to the GPU. Look for these lines:
@@ -25,9 +25,9 @@ GPU: A6000 (48GB VRAM)
 CPU: 7 physical cores
 RAM: 32GB
 
-Model: `TheBloke_Wizard-Vicuna-30B-Uncensored-GGML/Wizard-Vicuna-30B-Uncensored.ggmlv3.q4_0.bin` (30B parameters, 4bit quantization, GGML)
+Model: `TheBloke_Wizard-Vicuna-30B-Uncensored-GGML/Wizard-Vicuna-30B-Uncensored.q4_0.gguf` (30B parameters, 4bit quantization, GGML)
 
-Run command: `./main -m "path/to/model.bin" -p "-p "An extremely detailed description of the 10 best ethnic dishes will follow, with recipes: " -n 1000 [additional benchmark flags]`
+Run command: `./main -m "path/to/model.gguf" -p "An extremely detailed description of the 10 best ethnic dishes will follow, with recipes: " -n 1000 [additional benchmark flags]`
 
 Result:
 
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index b5d9bb29e..d2176c910 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -6,27 +6,6 @@ find_package(Threads REQUIRED)
 
 # ...
 
-# common
-
-set(TARGET common)
-
-add_library(${TARGET} OBJECT
-    common.h
-    common.cpp
-    console.h
-    console.cpp
-    grammar-parser.h
-    grammar-parser.cpp
-    )
-
-if (BUILD_SHARED_LIBS)
-    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif()
-
-target_include_directories(${TARGET} PUBLIC .)
-target_compile_features(${TARGET} PUBLIC cxx_std_11)
-target_link_libraries(${TARGET} PRIVATE llama)
-
 # examples
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
@@ -45,6 +24,7 @@ else()
     add_subdirectory(convert-llama2c-to-ggml)
     add_subdirectory(simple)
     add_subdirectory(embd-input)
+    add_subdirectory(llama-bench)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp
index 2185b9b0e..8a6ad882e 100644
--- a/examples/embd-input/embd-input-lib.cpp
+++ b/examples/embd-input/embd-input-lib.cpp
@@ -167,7 +167,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
         llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
 
         // TODO: Apply penalties
-        // float nl_logit = logits[llama_token_nl()];
+        // float nl_logit = logits[llama_token_nl(ctx)];
         // auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
         // llama_sample_repetition_penalty(ctx, &candidates_p,
         //      last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
@@ -176,7 +176,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
         // last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
         // last_n_repeat, alpha_frequency, alpha_presence);
         // if (!penalize_nl) {
-        //     logits[llama_token_nl()] = nl_logit;
+        //     logits[llama_token_nl(ctx)] = nl_logit;
         // }
 
         if (temp <= 0) {
@@ -211,7 +211,7 @@ const char * sampling(struct MyModel * mymodel) {
     llama_context * ctx = mymodel->ctx;
     int id = sampling_id(mymodel);
     static std::string ret;
-    if (id == llama_token_eos()) {
+    if (id == llama_token_eos(ctx)) {
         ret = "</s>";
     } else {
         ret = llama_token_to_str(ctx, id);
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
index d742dce17..dee00df87 100644
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -233,16 +233,13 @@ int main(int argc, char ** argv) {
     const std::string fname(argv[1]);
     const std::string mode (argv[2]);
 
-    GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q");
+    GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
 
     if (mode == "w") {
         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
     } else if (mode == "r") {
         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
         GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
-    } else if (mode == "q") {
-        llama_model_quantize_params params = llama_model_quantize_default_params();
-        llama_model_quantize(fname.c_str(), "quant.gguf", &params);
     }
 
     return 0;
diff --git a/cmpnct_gpt2bpe.hpp b/examples/gptneox-wip/cmpnct_gpt2bpe.hpp
similarity index 94%
rename from cmpnct_gpt2bpe.hpp
rename to examples/gptneox-wip/cmpnct_gpt2bpe.hpp
index ac5f8c672..81ea443b2 100644
--- a/cmpnct_gpt2bpe.hpp
+++ b/examples/gptneox-wip/cmpnct_gpt2bpe.hpp
@@ -14,6 +14,7 @@
 //-----
 // Unicode GPT2 Byte Pair Encoding Tokenizer
 // Adapted from https://github.com/cmp-nct/ggllm.cpp
+// Removed loading of merges from HF json and parts made for a specific vocab
 //-----
 
 // Unicode library (from cmpnct_unicode.cpp)
@@ -439,23 +440,18 @@ private:
 struct gpt2bpe_vocab {
     using id = int32_t;
     using token = std::string;
-    std::map<std::string, uint32_t> max_token_length; // max length, for each 2byte prefix
 
+    std::map<std::string, uint32_t> max_token_length; // max length, for each 2byte prefix
     std::map<std::pair<std::string,std::string>, int> bpe_ranks;
     std::vector<std::pair<std::string, std::string>> bpe_merges;
-    std::map<std::string, int> special_tokens;
 
-    id special_bos_id = 0;
-    id special_eos_id = 0;
-    id special_unk_id = 0;
-    id special_sep_id = 0;
-    id special_pad_id = 0;
+    id special_bos_id = -1;
+    id special_eos_id = -1;
+    id special_unk_id = -1;
+    id special_sep_id = -1;
+    id special_pad_id = -1;
 
-    bool special_have_bos = false;
-    bool special_have_eos = false;
-    bool special_have_unk = false;
-    bool special_have_sep = false;
-    bool special_have_pad = false;
+    id linefeed_id = -1;
 
     std::unordered_map<token, id> token_to_id;
     std::unordered_map<id, token> id_to_token;
@@ -480,22 +476,6 @@ struct gpt2bpe_vocab {
             bpe_ranks.emplace(bpe_merges_[i], i);
         }
         bpe_merges = bpe_merges_;
-
-        // populate special tokens too (0-11 and if available 65024++)
-
-        #if 0
-        for (int i = 0; i < 12; i++) {
-            special_tokens[id_to_token[i].tok] = i;
-        }
-        for (int i = 65024; i < (int)id_to_token.size(); i++) {
-            special_tokens[id_to_token[i].tok] = i;
-        }
-        #endif
-
-        // token_to_id["</s>"] = 11; // bugfix for TII instruct training (blocks stopwords)
-        // special_tokens["</s>"] = 11; // bugfix for TII instruct training (blocks stopwords)
-
-
         return bpe_merges_.size();
     }
 
@@ -512,10 +492,6 @@ struct gpt2bpe_vocab {
         }).base(), str.end());
     }
 
-    // removed, merges loaded from gguf model file:
-    // requires the standard HF type tokenizer.json (pretty printed)
-    // std::vector<std::pair<std::string, std::string>> parse_json_to_bpe_merges(const std::string& filename) {
-
     // get max token length available for a prefix of 2 bytes (string at least 2 bytes long)
     int get_max_token_length(const std::string& string) const {
         if (string.size() < 2)
@@ -613,45 +589,27 @@ struct gpt2bpe_tokenizer {
         {
             work_queue_ = ggllm_bpe_bigram::queue();
             symbols_.clear();
-            bool is_special = false;
-            for (auto it = vocab_.special_tokens.begin(); it != vocab_.special_tokens.end(); ++it)
-            {
-                std::string special_token = it->first;
-                if (word.compare(special_token) == 0)
-                {
-                    ggllm_bpe_symbol sym;
-                    sym.text = word.c_str();
-                    sym.n = word.size();
-                    sym.prev = -1;
-                    sym.next = -1;
-                    symbols_.emplace_back(sym);
-                    is_special = true;
-                    break;
-                }
-            }
 
             int index = 0;
             size_t offset = 0;
-            if (!is_special)
-            {
 
-                while (offset < word.size())
-                {
-                    ggllm_bpe_symbol sym;
-                    size_t char_len = std::min(word.size() - offset, (size_t) CNCTUnicode::utf8_len(word[offset]));
-                    sym.text = word.c_str() + offset;
-                    sym.n = 1;
-                    sym.n = char_len;
-                    offset += sym.n;
-                    sym.prev = index - 1;
-                    sym.next = offset == word.size() ? -1 : index + 1;
-                    index++;
-                    symbols_.emplace_back(sym);
-                }
-                for (size_t i = 1; i < symbols_.size(); ++i) {
-                    add_new_bigram(i - 1, i);
-                }
+            while (offset < word.size())
+            {
+                ggllm_bpe_symbol sym;
+                size_t char_len = std::min(word.size() - offset, (size_t) CNCTUnicode::utf8_len(word[offset]));
+                sym.text = word.c_str() + offset;
+                sym.n = 1;
+                sym.n = char_len;
+                offset += sym.n;
+                sym.prev = index - 1;
+                sym.next = offset == word.size() ? -1 : index + 1;
+                index++;
+                symbols_.emplace_back(sym);
             }
+            for (size_t i = 1; i < symbols_.size(); ++i) {
+                add_new_bigram(i - 1, i);
+            }
+
             // build token(s)
             while (!work_queue_.empty())
             {
@@ -794,17 +752,6 @@ private:
         bpe_encoded_words.reserve(text.size());
 
         text_utf = CNCTUnicode::split_utf8_enhanced(text);
-        std::map<std::string, int> special_tokens = vocab_.special_tokens;
-        int smallest_len_special_tokens = 0;
-        if (special_tokens.size())
-        {
-            smallest_len_special_tokens = special_tokens.begin()->first.size();
-            for (auto it = special_tokens.begin(); it != special_tokens.end(); ++it)
-            {
-                if (it->first.size() < (size_t)smallest_len_special_tokens)
-                    smallest_len_special_tokens = it->first.size();
-            }
-        }
 
         for (int i = 0; i < (int)text_utf.size(); i++)
         {
@@ -817,41 +764,6 @@ private:
             const CNCTString &utf_char_next_next = (i+2 < (int)text_utf.size()) ? text_utf[i+2] : CNCTString();
             // const CNCTString &utf_char_prev = (i > 0) ? text_utf[i-1] : CNCTString();
 
-        // handling special tokens
-            bool special_token_found = false;
-            if (bytes_remain >= (int)smallest_len_special_tokens)
-            for (auto it = special_tokens.begin(); it != special_tokens.end(); ++it)
-            {
-                if ((bytes_remain) < (int)it->first.size())
-                    continue;
-
-                if (str_is_equal(text_pos, it->first.c_str(), it->first.size()))
-                {
-                    if (token.size())
-                    {
-                        bpe_words.emplace_back(token); // push previous content as token
-                        token.clear();
-                        collecting = false;
-                        collecting_letter = false;
-                        collecting_numeric = false;
-                        collecting_special = false;
-                        collecting_whitespace_lookahead = false;
-                    }
-
-                    bpe_words.emplace_back(it->first); // push special token as token
-
-                    // we now advance i until the token is fulfilled by the utf_chars
-                    int st_bytes = (int)it->first.size();
-                    for (;st_bytes;st_bytes -= text_utf[i++].str.size());
-                    i--;
-                    special_token_found = true;
-                    break;
-                }
-            }
-
-            if (special_token_found)   continue;
-
-
             // handling contractions
             if (!split_condition && bytes_remain >= 2)
             {
@@ -1002,7 +914,7 @@ static std::vector<gpt2bpe_vocab::id> gpt2bpe_tokenize(const gpt2bpe_vocab & voc
         return output;
     }
 
-    if (bos && vocab.special_have_bos) {
+    if (bos && vocab.special_bos_id != -1) {
         output.push_back(vocab.special_bos_id);
     }
 
diff --git a/examples/gptneox-wip/falcon-main.cpp b/examples/gptneox-wip/falcon-main.cpp
new file mode 100644
index 000000000..43b6a29f3
--- /dev/null
+++ b/examples/gptneox-wip/falcon-main.cpp
@@ -0,0 +1,1111 @@
+#include "ggml.h"
+#include "cmpnct_gpt2bpe.hpp"
+
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+#include <cinttypes>
+#include <fstream>
+#include <map>
+#include <string>
+#include <vector>
+#include <thread>
+#include <random>
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
+// default hparams
+struct falcon_hparams {
+    size_t n_merges = 0;
+    size_t n_vocab  = 0;
+    uint32_t n_ctx    = 0;
+    uint32_t n_embd   = 0;
+    uint32_t n_head   = 0;
+    uint32_t n_head_kv = 1; // Needs to be 1 for 7B model
+    uint32_t n_ff = 0;
+    uint32_t n_block  = 0;
+    float norm_eps = 1e-5;
+};
+struct falcon_block {
+    // normalization
+    struct ggml_tensor* input_layernorm;
+    struct ggml_tensor* input_layernorm_b;
+    struct ggml_tensor* attention_norm;    // Falcon-40B only
+    struct ggml_tensor* attention_norm_b;  // Falcon-40B only
+
+    // attention
+    struct ggml_tensor* query_key_value;
+    struct ggml_tensor* wo;
+
+    // ff
+    struct ggml_tensor* ffn_up;
+    struct ggml_tensor* ffn_down;
+};
+
+struct falcon_model {
+    falcon_hparams hparams;
+
+    struct ggml_tensor* tok_embeddings;
+    struct ggml_tensor* output_norm;
+    struct ggml_tensor* output_norm_b;
+    struct ggml_tensor* lm_head;
+
+    std::vector<falcon_block> blocks;
+
+    // key + value memory
+    struct ggml_tensor* memory_k;
+    struct ggml_tensor* memory_v;
+
+    struct gguf_context * ggufctx;
+    struct ggml_context * ctx;
+    struct ggml_context * kvctx;
+
+    std::map<std::string, struct ggml_tensor*> tensors;
+};
+
+struct gpt_params {
+    int32_t seed      = -1;  // RNG seed
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    uint32_t n_predict = 200; // new tokens to predict
+    uint32_t n_batch   = 512;   // batch size for prompt processing
+
+    // sampling parameters
+    int32_t top_k          = 40;
+    float top_p            = 1.0f;
+    float temp             = 0.8f;
+    int32_t repeat_last_n  = 64;
+    float repeat_penalty   = 1.02f;
+
+    std::string model      = ""; // model path
+    std::string prompt     = "";
+
+    std::string token_test = "";
+    bool    interactive      = false;
+    int32_t interactive_port = -1;
+    int32_t n_gpu_layers     = 0;
+};
+
+void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h, --help            show this help message and exit\n");
+    fprintf(stderr, "  -s SEED, --seed SEED  RNG seed (default: -1)\n");
+    fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "  -ngl N, --gpu-layers N  number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers);
+    fprintf(stderr, "  -p PROMPT, --prompt PROMPT\n");
+    fprintf(stderr, "                        prompt to start generation with (default: random)\n");
+    fprintf(stderr, "  -f FNAME, --file FNAME\n");
+    fprintf(stderr, "                        load prompt from a file\n");
+    fprintf(stderr, "  -tt TOKEN_TEST, --token_test TOKEN_TEST\n");
+    fprintf(stderr, "                        test tokenization\n");
+    fprintf(stderr, "  -n N, --n_predict N   number of tokens to predict (default: %d)\n", params.n_predict);
+    fprintf(stderr, "  --top_k N             top-k sampling, 0 = n_vocab (default: %d)\n", params.top_k);
+    fprintf(stderr, "  --top_p N             top-p sampling (default: %.1f)\n", params.top_p);
+    fprintf(stderr, "  --temp N              temperature (default: %.1f)\n", params.temp);
+    fprintf(stderr, "  --repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled)\n", params.repeat_last_n);
+    fprintf(stderr, "  --repeat-penalty N    penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty);
+    fprintf(stderr, "  -b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+    fprintf(stderr, "  -m FNAME, --model FNAME\n");
+    fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "\n");
+}
+
+// Function to check if the next argument exists
+std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
+    if (i + 1 < argc && argv[i + 1][0] != '-') {
+        return argv[++i];
+    } else {
+        fprintf(stderr, "error: %s requires one argument.\n", flag.c_str());
+        gpt_print_usage(argc, argv, params);
+        exit(0);
+    }
+}
+
+bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-s" || arg == "--seed") {
+            params.seed = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-t" || arg == "--threads") {
+            params.n_threads = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers") {
+            params.n_gpu_layers = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-p" || arg == "--prompt") {
+            params.prompt = get_next_arg(i, argc, argv, arg, params);
+        } else if (arg == "-n" || arg == "--n_predict") {
+            params.n_predict = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "--top_k") {
+            params.top_k = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "--top_p") {
+            params.top_p = std::stof(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "--temp") {
+            params.temp = std::stof(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "--repeat-last-n") {
+            params.repeat_last_n = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "--repeat-penalty") {
+            params.repeat_penalty = std::stof(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-b" || arg == "--batch_size") {
+            params.n_batch= std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-m" || arg == "--model") {
+            params.model = get_next_arg(i, argc, argv, arg, params);
+        } else if (arg == "-i" || arg == "--interactive") {
+            params.interactive = true;
+        } else if (arg == "-ip" || arg == "--interactive-port") {
+            params.interactive = true;
+            params.interactive_port = std::stoi(get_next_arg(i, argc, argv, arg, params));
+        } else if (arg == "-h" || arg == "--help") {
+            gpt_print_usage(argc, argv, params);
+            exit(0);
+        } else if (arg == "-f" || arg == "--file") {
+            get_next_arg(i, argc, argv, arg, params);
+            std::ifstream file(argv[i]);
+            if (!file) {
+                fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
+                break;
+            }
+            std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
+            if (params.prompt.back() == '\n') {
+                params.prompt.pop_back();
+            }
+        } else if (arg == "-tt" || arg == "--token_test") {
+            params.token_test = get_next_arg(i, argc, argv, arg, params);
+        }
+        else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            gpt_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+gpt2bpe_vocab::id sample_top_k_top_p_repeat(
+        const gpt2bpe_vocab & vocab,
+        const float * logits,
+        const int32_t * last_n_tokens_data,
+        size_t last_n_tokens_data_size,
+        int    top_k,
+        double top_p,
+        double temp,
+        int repeat_last_n,
+        float repeat_penalty,
+        std::mt19937 & rng) {
+
+    int n_logits = vocab.id_to_token.size();
+
+    const auto * plogits = logits;
+
+    const auto last_n_tokens = std::vector<int32_t>(last_n_tokens_data, last_n_tokens_data + last_n_tokens_data_size);
+
+    if (temp <= 0) {
+        // select the token with the highest logit directly
+        float max_logit = plogits[0];
+        gpt2bpe_vocab::id max_id = 0;
+
+        for (int i = 1; i < n_logits; ++i) {
+            if (plogits[i] > max_logit) {
+                max_logit = plogits[i];
+                max_id = i;
+            }
+        }
+        return max_id;
+    }
+
+
+    std::vector<std::pair<double, gpt2bpe_vocab::id>> logits_id;
+    logits_id.reserve(n_logits);
+
+    {
+        const float scale = 1.0f/temp;
+        for (int i = 0; i < n_logits; ++i) {
+            // repetition penalty from ctrl paper (https://arxiv.org/abs/1909.05858)
+            // credit https://github.com/facebookresearch/llama/compare/main...shawwn:llama:main
+            if (repeat_last_n > 0 && std::find(last_n_tokens.end()-repeat_last_n, last_n_tokens.end(), i) != last_n_tokens.end()) {
+                // if score < 0 then repetition penalty has to multiplied to reduce the previous token probability
+                if (plogits[i] < 0.0f) {
+                    logits_id.push_back(std::make_pair(plogits[i]*scale*repeat_penalty, i));
+                } else {
+                    logits_id.push_back(std::make_pair(plogits[i]*scale/repeat_penalty, i));
+                }
+            } else {
+                logits_id.push_back(std::make_pair(plogits[i]*scale, i));
+            }
+        }
+    }
+
+    // find the top K tokens
+    std::partial_sort(
+            logits_id.begin(),
+            logits_id.begin() + top_k, logits_id.end(),
+            [](const std::pair<double, gpt2bpe_vocab::id> & a, const std::pair<double, gpt2bpe_vocab::id> & b) {
+        return a.first > b.first;
+    });
+
+    logits_id.resize(top_k);
+
+    double maxl = -INFINITY;
+    for (const auto & kv : logits_id) {
+        maxl = std::max(maxl, kv.first);
+    }
+
+    // compute probs for the top K tokens
+    std::vector<double> probs;
+    probs.reserve(logits_id.size());
+
+    double sum = 0.0;
+    for (const auto & kv : logits_id) {
+        double p = exp(kv.first - maxl);
+        probs.push_back(p);
+        sum += p;
+    }
+
+    // normalize the probs
+    for (auto & p : probs) {
+        p /= sum;
+    }
+
+    if (top_p < 1.0f) {
+        double cumsum = 0.0f;
+        for (int i = 0; i < top_k; i++) {
+            cumsum += probs[i];
+            if (cumsum >= top_p) {
+                top_k = i + 1;
+                probs.resize(top_k);
+                logits_id.resize(top_k);
+                break;
+            }
+        }
+
+        cumsum = 1.0/cumsum;
+        for (int i = 0; i < (int) probs.size(); i++) {
+            probs[i] *= cumsum;
+        }
+    }
+
+//    printf("\n");
+//    for (int i = 0; i < (int) probs.size(); i++) {
+//    for (int i = 0; i < 10; i++) {
+//        printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
+//    }
+
+    std::discrete_distribution<> dist(probs.begin(), probs.end());
+    int idx = dist(rng);
+
+    return logits_id[idx].second;
+
+}
+
+struct ggml_tensor * get_tensor_ex( struct ggml_context * ctx, std::string name){
+
+    struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str());
+    if( cur == NULL ) {
+        fprintf(stdout, "%s: tensor '%s' not found!\n", __func__, name.c_str());
+    } else {
+//        fprintf(stdout, "%s: n_dims = %d, name = '%s'\n", __func__, cur->n_dims, cur->name);
+    }
+
+    return cur;
+}
+
+// load the model's weights from a file
+bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_vocab & vocab) {
+    printf("%s: loading model from '%s'..\n", __func__, fname.c_str());
+
+    model.ctx = NULL;
+
+    struct gguf_init_params ggufparams = {
+        /*.no_alloc = */ false,
+        /*.ctx      = */ &model.ctx,
+    };
+
+    auto & ggufctx = model.ggufctx;
+
+    ggufctx  = gguf_init_from_file(fname.c_str(), ggufparams);
+
+    if (!ggufctx) {
+        fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
+        return false;
+    }
+
+    fprintf(stdout, "%s: gguf version     = %d\n", __func__, gguf_get_version(ggufctx));
+    fprintf(stdout, "%s: gguf alignment   = %zu\n", __func__, gguf_get_alignment(ggufctx));
+    fprintf(stdout, "%s: gguf data offset = %zu\n", __func__, gguf_get_data_offset(ggufctx));
+
+    // print all kv
+    #if 0
+    {
+        const int n_kv = gguf_get_n_kv(ggufctx);
+
+        fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv);
+
+        for (int i = 0; i < n_kv; ++i) {
+            const char * key = gguf_get_key(ggufctx, i);
+
+            fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key);
+        }
+    }
+    #endif
+
+    // print some standard metadata
+    {
+        int keyidx;
+
+        keyidx = gguf_find_key(ggufctx, "general.name");
+        if (keyidx != -1) { fprintf(stdout, "%s: model name           = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.description");
+        if (keyidx != -1) { fprintf(stdout, "%s: model description    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.author");
+        if (keyidx != -1) { fprintf(stdout, "%s: model author         = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.license");
+        if (keyidx != -1) { fprintf(stdout, "%s: model license        = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.architecture");
+        if (keyidx != -1) { fprintf(stdout, "%s: model architecture   = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.file_type");
+        if (keyidx != -1) { fprintf(stdout, "%s: model file type      = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "gptneox.tensor_data_layout");
+        if (keyidx != -1) { fprintf(stdout, "%s: model data layout    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+        keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository");
+        if (keyidx != -1) { fprintf(stdout, "%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
+    }
+
+    // check required metadata
+    {
+        int keyidx;
+
+        // check model architecture kv
+        keyidx = gguf_find_key(ggufctx, "general.architecture");
+        if (keyidx != -1) {
+            if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "falcon") != 0) {
+                fprintf(stdout, "%s: model architecture not supported!\n", __func__);
+                return false;
+            }
+        } else {
+            fprintf(stdout, "%s: gguf model architecture not found!\n", __func__);
+            return false;
+        }
+
+        // check model tensor data layout kv
+        keyidx = gguf_find_key(ggufctx, "falcon.tensor_data_layout");
+        if (keyidx != -1) {
+            if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "jploski") != 0) {
+                fprintf(stdout, "%s: model tensor data layout not supported!\n", __func__);
+                return false;
+            }
+        } else {
+            fprintf(stdout, "%s: gguf model tensor data layout not found!\n", __func__);
+            return false;
+        }
+
+    }
+
+    // load hparams
+    {
+        auto & hparams = model.hparams;
+
+        bool ok = true;
+        int keyidx;
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.context_length");
+                  if (keyidx != -1) { hparams.n_ctx = gguf_get_val_u32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.embedding_length");
+                  if (keyidx != -1) { hparams.n_embd = gguf_get_val_u32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.attention.head_count");
+                  if (keyidx != -1) { hparams.n_head = gguf_get_val_u32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.feed_forward_length");
+                  if (keyidx != -1) { hparams.n_ff = gguf_get_val_u32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.block_count");
+                  if (keyidx != -1) { hparams.n_block = gguf_get_val_u32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (ok) { keyidx = gguf_find_key(ggufctx, "falcon.attention.layer_norm_epsilon");
+                  if (keyidx != -1) { hparams.norm_eps= gguf_get_val_f32(ggufctx, keyidx); } else { ok = false; }  }
+
+        if (!ok) {
+            fprintf(stderr, "%s: required hparam missing!\n", __func__);
+            return false;
+        }
+
+        keyidx = gguf_find_key(ggufctx, "falcon.attention.head_count_kv");
+        if (keyidx != -1) { hparams.n_head_kv = gguf_get_val_u32(ggufctx, keyidx); }
+
+
+        printf("%s: n_ctx      = %d\n", __func__, hparams.n_ctx);
+        printf("%s: n_embd     = %d\n", __func__, hparams.n_embd);
+        printf("%s: n_head     = %d\n", __func__, hparams.n_head);
+        printf("%s: n_head_kv  = %d\n", __func__, hparams.n_head_kv);
+        printf("%s: n_block    = %d\n", __func__, hparams.n_block);
+        printf("%s: norm_eps   = %g\n", __func__, hparams.norm_eps);
+
+    }
+
+    // load vocab
+    {
+        auto & hparams = model.hparams;
+
+        int keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.model");
+
+        if (keyidx != -1) {
+            if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gpt2") != 0) {
+                fprintf(stdout, "%s: tokenizer model not supported!\n", __func__);
+                return false;
+            }
+        } else {
+            fprintf(stdout, "%s: tokenizer model not found!\n", __func__);
+            return false;
+        }
+
+
+        int tokens_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.tokens");
+
+        if (tokens_keyidx == -1) {
+            fprintf(stdout, "%s: gpt2 tokenizer vocab not found!\n", __func__);
+            return false;
+        }
+
+        int merges_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.merges");
+
+        if (merges_keyidx == -1) {
+            fprintf(stdout, "%s: gpt2 tokenizer merges not found!\n", __func__);
+            return false;
+        }
+
+        hparams.n_vocab = gguf_get_arr_n(ggufctx,tokens_keyidx);
+        hparams.n_merges = gguf_get_arr_n(ggufctx,merges_keyidx);
+
+        fprintf(stdout, "%s: gpt2 tokenizer vocab  = %zu\n", __func__, hparams.n_vocab);
+        fprintf(stdout, "%s: gpt2 tokenizer merges = %zu\n", __func__, hparams.n_merges);
+
+        for (size_t i = 0; i < hparams.n_vocab; i++) {
+            std::string word = gguf_get_arr_str(ggufctx, tokens_keyidx, i);
+
+//            printf("token %d = '%s'\n",i,word.c_str() );
+
+            vocab.token_to_id[word] = i;
+            vocab.id_to_token[i] = word;
+
+            if( vocab.id_to_token[i] == "\n" ) {
+                vocab.linefeed_id = i;
+            }
+        }
+
+        std::vector<std::pair<std::string, std::string>> bpe_merges;
+
+        for (size_t i = 0; i < hparams.n_merges; i++) {
+
+            std::string word = gguf_get_arr_str(ggufctx, merges_keyidx, i);
+
+            // Split the merges
+            std::string first, second;
+            size_t pos = word.find(' ', 1); // Start the search from the second character
+            if (pos != std::string::npos) {
+                first = word.substr(0, pos);
+                second = word.substr(pos + 1);
+            }
+
+            bpe_merges.push_back(std::make_pair(first, second));
+        }
+
+        vocab.populate_bpe_ranks(bpe_merges);
+
+
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.bos_token_id"); if( keyidx != -1 ) {       vocab.special_bos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.eos_token_id"); if( keyidx != -1 ) {       vocab.special_eos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.unknown_token_id"); if( keyidx != -1 ) {   vocab.special_unk_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+
+        if( vocab.special_bos_id != -1 ) { fprintf(stdout, "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); }
+        if( vocab.special_eos_id != -1 ) { fprintf(stdout, "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); }
+        if( vocab.special_unk_id != -1 ) { fprintf(stdout, "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); }
+        if( vocab.special_sep_id != -1 ) { fprintf(stdout, "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); }
+        if( vocab.special_pad_id != -1 ) { fprintf(stdout, "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); }
+        if( vocab.linefeed_id    != -1 ) { fprintf(stdout, "%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); }
+
+    }
+
+
+    auto & ctx = model.ctx;
+    size_t ctx_size = ggml_get_mem_size(ctx);
+
+    printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
+
+    // print tensor info
+    #if 0
+    {
+        const int n_tensors = gguf_get_n_tensors(ggufctx);
+
+        fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
+
+        for (int i = 0; i < n_tensors; ++i) {
+            const char * name   = gguf_get_tensor_name  (ggufctx, i);
+            const size_t offset = gguf_get_tensor_offset(ggufctx, i);
+
+            fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
+        }
+    }
+    #endif
+
+    // prepare memory for the weights
+    {
+
+        auto & hparams = model.hparams;
+
+        const int n_block = hparams.n_block;
+
+        model.blocks.resize(n_block);
+
+        model.tok_embeddings = ggml_get_tensor(ctx, "token_embd.weight");
+
+        model.output_norm = ggml_get_tensor(ctx, "output_norm.weight");
+        model.output_norm_b = ggml_get_tensor(ctx, "output_norm.bias");
+        model.lm_head = ggml_get_tensor(ctx, "output.weight");
+
+        // map by name
+        model.tensors["token_embd.weight"] = model.tok_embeddings;
+        model.tensors["output_norm.weight"] = model.output_norm;
+        model.tensors["output_norm.bias"] = model.output_norm_b;
+        model.tensors["output.weight"] = model.lm_head;
+
+        for (int i = 0; i < n_block; ++i) {
+
+            auto& block = model.blocks[i];
+            std::string blocknamestart = "blk." + std::to_string(i) + ".";
+
+            block.input_layernorm   =  get_tensor_ex(ctx, blocknamestart + "attn_norm.weight" );
+            block.input_layernorm_b =  get_tensor_ex(ctx, blocknamestart + "attn_norm.bias" );
+
+            if ( hparams.n_head_kv == 8 ) { // Falcon-40B
+                block.attention_norm   =  get_tensor_ex(ctx, blocknamestart + "attn_norm_2.weight" );
+                block.attention_norm_b =  get_tensor_ex(ctx, blocknamestart + "attn_norm_2.bias" );
+            }
+
+            // query_key_value shape for config.multi_query == True:
+            block.query_key_value = get_tensor_ex(ctx, blocknamestart + "attn_qkv.weight" );
+            block.wo = get_tensor_ex(ctx, blocknamestart + "attn_output.weight" );
+
+            block.ffn_up = get_tensor_ex(ctx, blocknamestart + "ffn_up.weight" );
+            block.ffn_down = get_tensor_ex(ctx, blocknamestart + "ffn_down.weight" );
+
+            // map by name
+            if ( hparams.n_head_kv == 8 ) { // Falcon-40B
+                // Falcon-40B:
+                model.tensors[blocknamestart + "attn_norm.weight"] = block.input_layernorm;
+                model.tensors[blocknamestart + "attn_norm.bias"] = block.input_layernorm_b;
+                model.tensors[blocknamestart + "attn_norm_2.weight"] = block.attention_norm;
+                model.tensors[blocknamestart + "attn_norm_2.bias"] = block.attention_norm_b;
+            } else {
+                // Falcon-7B:
+                model.tensors[blocknamestart + "attn_norm.weight"] = block.input_layernorm;
+                model.tensors[blocknamestart + "attn_norm.bias"] = block.input_layernorm_b;
+            }
+
+            model.tensors[blocknamestart + "attn_qkv.weight"] = block.query_key_value;
+            model.tensors[blocknamestart + "attn_output.weight"] = block.wo;
+
+            model.tensors[blocknamestart + "ffn_up.weight"] = block.ffn_up;
+            model.tensors[blocknamestart + "ffn_down.weight"] = block.ffn_down;
+        }
+    }
+
+    // key + value memory
+    {
+        const auto & kvctx = model.kvctx;
+        const auto & hparams = model.hparams;
+
+        const int n_block = hparams.n_block;
+        const int n_ctx   = hparams.n_ctx;
+        const int n_embd = hparams.n_embd;
+
+        const int64_t n_mem      = n_block*n_ctx;
+        const int64_t n_elements = n_embd*n_mem;
+
+        // create the ggml context
+        {
+            struct ggml_init_params params = {
+                /*.mem_size   =*/ size_t(n_elements*4+ggml_tensor_overhead()*2),
+                /*.mem_buffer =*/ NULL,
+                /*.no_alloc   =*/ false,
+            };
+
+            model.kvctx = ggml_init(params);
+            if (!model.kvctx) {
+                fprintf(stderr, "%s: kv ggml_init() failed\n", __func__);
+                return false;
+            }
+
+        }
+
+
+        model.memory_k = ggml_new_tensor_1d(kvctx, GGML_TYPE_F16, n_elements);
+        model.memory_v = ggml_new_tensor_1d(kvctx, GGML_TYPE_F16, n_elements);
+
+        const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);
+
+        printf("%s: memory_size = %8.2f MB, n_mem = %" PRId64 "\n", __func__, memory_size/1024.0/1024.0, n_mem);
+    }
+
+    return true;
+}
+
+
+// evaluate the transformer
+//
+//   - model:     the model
+//   - n_threads: number of threads to use
+//   - n_past:    the context size so far
+//   - embd_inp:  the embeddings of the tokens in the context
+//   - embd_w:    the predicted logits for the next token
+//
+bool falcon_eval(
+        const falcon_model & model,
+        const int n_threads,
+        const int n_past,
+        const std::vector<gpt2bpe_vocab::id> & embd_inp,
+              std::vector<float>         & embd_w,
+              size_t                     & mem_per_token) {
+
+
+    const int N = embd_inp.size();
+
+    const auto & hparams = model.hparams;
+
+    const int n_embd  = hparams.n_embd;
+    const int n_block = hparams.n_block;
+    const int n_ctx   = hparams.n_ctx;
+    const int n_head  = hparams.n_head;
+    const int n_head_kv = hparams.n_head_kv;
+    const int n_vocab = hparams.n_vocab;
+    const size_t head_dim = n_embd / n_head;
+
+    static size_t buf_size = 256u*1024*1024;
+    static void * buf = malloc(buf_size);
+
+    // use 2 scratch buffers
+    // TODO: very hacky solution - reimplement in a more elegant way
+    static size_t scr0_size = 256u*1024*1024;
+    static void * scr0 = malloc(scr0_size);
+
+    static size_t scr1_size = 256u*1024*1024;
+    static void * scr1 = malloc(scr1_size);
+
+    if (mem_per_token > 0 && mem_per_token*N > buf_size) {
+        const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
+        //printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
+
+        // reallocate
+        buf_size = buf_size_new;
+        buf = realloc(buf, buf_size);
+        if (buf == nullptr) {
+            fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
+            return false;
+        }
+    }
+
+    struct ggml_init_params params = {
+        /*.mem_size   =*/ buf_size,
+        /*.mem_buffer =*/ buf,
+        /*.no_alloc   =*/ false,
+    };
+
+    struct ggml_context * ctx0 = ggml_init(params);
+    struct ggml_cgraph gf = {};
+//    gf.n_threads = n_threads;
+
+    struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
+    memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
+
+    // wte
+    struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
+//    struct ggml_tensor* repeat_dummy = ggml_new_tensor_3d(ctx0, inpL->type, head_dim, N + n_past, n_head);
+
+    ggml_type wtype = GGML_TYPE_F32;
+    const int sizeof_wtype = ggml_type_sizef(wtype);
+
+    for (int il = 0; il < n_block; ++il) {
+        struct ggml_tensor * cur;
+        struct ggml_tensor * layernorm_output;
+
+        ggml_set_scratch(ctx0, { 0, scr0_size, scr0, });
+
+        // self-attention
+        {
+            layernorm_output = ggml_norm(ctx0, inpL);
+
+            layernorm_output = ggml_add(ctx0,
+                    ggml_mul(ctx0,
+                        ggml_repeat(ctx0, model.blocks[il].input_layernorm, layernorm_output),
+                        layernorm_output),
+                    ggml_repeat(ctx0, model.blocks[il].input_layernorm_b, layernorm_output));
+
+            if ( hparams.n_head_kv == 8 ) { // Falcon-40B
+                cur = ggml_norm(ctx0, inpL);
+
+                cur = ggml_add(ctx0,
+                        ggml_mul(ctx0,
+                            ggml_repeat(ctx0, model.blocks[il].attention_norm, cur),
+                            cur),
+                        ggml_repeat(ctx0, model.blocks[il].attention_norm_b, cur));
+            }
+            else { // Falcon 7B
+                cur = layernorm_output;
+            }
+
+            // compute QKV
+
+            cur = ggml_mul_mat(ctx0, model.blocks[il].query_key_value, cur);
+
+            // Note that the strides for Kcur, Vcur are set up so that the
+            // resulting views are misaligned with the tensor's storage
+            // (by applying the K/V offset we shift the tensor's original
+            // view to stick out behind the viewed QKV tensor's allocated
+            // memory, so to say). This is ok because no actual accesses
+            // happen to that out-of-range memory, but it can require some
+            // trickery when trying to accurately dump these views for
+            // debugging.
+
+            struct ggml_tensor * Qcur = ggml_view_3d(
+                ctx0, cur, head_dim, n_head, N,
+                head_dim * sizeof_wtype,
+                head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype,
+                0);
+
+            struct ggml_tensor * Kcur = ggml_view_3d(
+                ctx0, cur, head_dim, n_head_kv, N,
+                head_dim * sizeof_wtype,
+                head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype,
+                head_dim * n_head * sizeof_wtype);
+
+            struct ggml_tensor * Vcur = ggml_view_3d(
+                ctx0, cur, head_dim, n_head_kv, N,
+                head_dim * sizeof_wtype,
+                head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype,
+                head_dim * (n_head + n_head_kv) * sizeof_wtype);
+
+            // using mode = 2 for neox mode
+            Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, head_dim, 2, 0);
+            Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, head_dim, 2, 0);
+
+            // store key and value to memory
+            {
+                struct ggml_tensor* k = ggml_view_1d(
+                    ctx0, model.memory_k, N * n_head_kv * head_dim,
+                    (ggml_element_size(model.memory_k) * n_head_kv * head_dim) *
+                        (il * n_ctx + n_past));
+                struct ggml_tensor* v = ggml_view_1d(
+                    ctx0, model.memory_v, N * n_head_kv * head_dim,
+                    (ggml_element_size(model.memory_v) * n_head_kv * head_dim) *
+                        (il * n_ctx + n_past));
+
+                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
+                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
+            }
+
+            struct ggml_tensor * K = ggml_permute(
+                ctx0,
+                ggml_reshape_3d(
+                    ctx0,
+                    ggml_view_1d(ctx0, model.memory_k, (n_past + N) * n_head_kv * head_dim,
+                                 il * n_ctx *
+                                     ggml_element_size(model.memory_k) *
+                                     n_head_kv *
+                                     head_dim),
+                    head_dim, n_head_kv, n_past + N),
+                0, 2, 1, 3);
+
+            // K * Q
+
+//            K = ggml_cont(ctx0, ggml_repeat2(ctx0, K, repeat_dummy));
+
+            struct ggml_tensor * Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3);
+            struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
+
+            // KQ_scaled = KQ / sqrt(n_embd/n_head)
+            struct ggml_tensor * KQ_scaled =
+                ggml_scale_inplace(ctx0,
+                        KQ,
+                        ggml_new_f32(ctx0, 1.0f/sqrt(float(head_dim)))
+                        );
+
+            // KQ_masked = mask_past(KQ_scaled)
+            struct ggml_tensor * KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
+
+            // KQ = soft_max(KQ_masked)
+            struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked);
+
+            // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous()
+            struct ggml_tensor* V = ggml_permute(
+                ctx0,
+                ggml_reshape_3d(
+                    ctx0,
+                    ggml_view_1d(ctx0, model.memory_v, (n_past + N) * n_head_kv * head_dim,
+                                 il * n_ctx *
+                                     ggml_element_size(model.memory_v) *
+                                     n_head_kv *
+                                     head_dim),
+                    head_dim, n_head_kv, n_past + N),
+                0, 2, 1, 3);
+
+//            V = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_repeat2(ctx0, V, repeat_dummy)));
+            V = ggml_cont(ctx0, ggml_transpose(ctx0, V));
+
+            // KQV = transpose(V) * KQ_soft_max
+            struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
+
+            // KQV_merged = KQV.permute(0, 2, 1, 3)
+            struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
+
+            // cur = KQV_merged.contiguous().view(n_embd, N)
+            cur = ggml_cpy(ctx0,
+                    KQV_merged,
+                    ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
+
+            // projection
+            {
+                cur = ggml_mul_mat(ctx0,
+                        model.blocks[il].wo,
+                        cur);
+            }
+        }
+
+        ggml_set_scratch(ctx0, { 0, scr1_size, scr1, });
+
+        struct ggml_tensor* inpFF = layernorm_output;
+        struct ggml_tensor* attn_out = ggml_cpy(
+            ctx0, cur, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
+
+        {
+            cur = ggml_mul_mat(ctx0, model.blocks[il].ffn_up, inpFF);
+            cur = ggml_gelu(ctx0, cur);
+            cur = ggml_mul_mat(ctx0, model.blocks[il].ffn_down, cur);
+        }
+
+        cur = ggml_add(ctx0, cur, attn_out);
+        cur = ggml_add(ctx0, cur, inpL);
+        // input for next layer
+        inpL = cur;
+    }
+
+    ggml_set_scratch(ctx0, { 0, scr0_size, scr0, });
+
+    // norm
+    {
+        inpL = ggml_norm(ctx0, inpL);
+
+        // inpL = ln_f_g*inpL + ln_f_b
+        inpL = ggml_add(ctx0,
+                ggml_mul(ctx0,
+                    ggml_repeat(ctx0, model.output_norm, inpL),
+                    inpL),
+                ggml_repeat(ctx0, model.output_norm_b, inpL));
+    }
+
+    ggml_set_scratch(ctx0, { 0, 0, nullptr, });
+
+    // lm_head
+    {
+        inpL = ggml_mul_mat(ctx0, model.lm_head, inpL);
+
+        //inpL = ggml_add(ctx0,
+        //        ggml_repeat(ctx0, model.lmh_b, inpL),
+        //        inpL);
+    }
+
+    // logits -> probs
+    //inpL = ggml_soft_max_inplace(ctx0, inpL);
+
+    // run the computation
+    ggml_build_forward_expand(&gf, inpL);
+//    ggml_graph_compute       (ctx0, &gf);
+    ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+
+    //if (n_past%100 == 0) {
+    //    ggml_graph_print   (&gf);
+    //    ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot");
+    //}
+
+    // return result for just the last token
+    embd_w.resize(n_vocab);
+    memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), sizeof(float) * n_vocab);
+
+    if (mem_per_token == 0) {
+        mem_per_token = ggml_used_mem(ctx0)/N;
+    }
+    //printf("used_mem = %zu\n", ggml_used_mem(ctx0));
+
+    ggml_free(ctx0);
+
+    return true;
+}
+
+int main(int argc, char ** argv) {
+    ggml_time_init();
+
+    const int64_t t_main_start_us = ggml_time_us();
+
+    gpt_params params;
+
+    if (gpt_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    int64_t t_load_us = 0;
+
+    gpt2bpe_vocab vocab;
+    falcon_model model;
+
+    // load the model
+    {
+        const int64_t t_start_us = ggml_time_us();
+
+        if (!falcon_model_load(params.model, model, vocab)) {
+            fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
+            return 1;
+        }
+
+        t_load_us = ggml_time_us() - t_start_us;
+
+    }
+
+    if (params.seed < 0) {
+        params.seed = time(NULL);
+    }
+
+    if (params.top_k == 0) {
+        params.top_k = model.hparams.n_vocab;
+    }
+
+    printf("%s: seed           = %d\n",   __func__, params.seed);
+    printf("%s: temp           = %.3f\n", __func__, params.temp);
+    printf("%s: top_k          = %d\n",   __func__, params.top_k);
+    printf("%s: top_p          = %.3f\n", __func__, params.top_p);
+    printf("%s: repeat_last_n  = %d\n",   __func__, params.repeat_last_n);
+    printf("%s: repeat_penalty = %.3f\n", __func__, params.repeat_penalty);
+
+    std::mt19937 rng(params.seed);
+
+    if (params.prompt.empty()) {
+        params.prompt = "Once upon";
+    }
+
+    std::vector<int32_t> last_n_tokens(model.hparams.n_ctx);
+    std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
+
+    int n_past = 0;
+
+    int64_t t_sample_us  = 0;
+    int64_t t_predict_us = 0;
+
+    std::vector<float> logits;
+
+    // tokenize the prompt
+    std::vector<gpt2bpe_vocab::id> embd_inp = gpt2bpe_tokenize(vocab, params.prompt,false, false);
+
+    params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
+
+    printf("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
+//    for (size_t i = 0; i < embd_inp.size(); i++) {
+//        printf("%s: token[%zu] = %6d, %s\n", __func__, i, embd_inp[i], vocab.id_to_token[embd_inp[i]].c_str());
+//    }
+
+    if( model.hparams.n_ctx < params.n_predict+embd_inp.size() ) {
+        params.n_predict = model.hparams.n_ctx-embd_inp.size();
+    }
+
+    printf("%s: n_predict = %d\n", __func__, params.n_predict);
+    printf("\n");
+
+    std::vector<gpt2bpe_vocab::id> embd;
+
+    // determine the required inference memory per token:
+    size_t mem_per_token = 0;
+    falcon_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
+
+    for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
+        // predict
+        if (embd.size() > 0) {
+            const int64_t t_start_us = ggml_time_us();
+
+            if (!falcon_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
+                printf("Failed to predict\n");
+                return 1;
+            }
+
+            t_predict_us += ggml_time_us() - t_start_us;
+        }
+
+        n_past += embd.size();
+        embd.clear();
+
+        if (i >= embd_inp.size()) {
+            // sample next token
+            const int   top_k = params.top_k;
+            const float top_p = params.top_p;
+            const float temp  = params.temp;
+            const int repeat_last_n = params.repeat_last_n;
+            const float repeat_penalty = params.repeat_penalty;
+
+            const int n_vocab = model.hparams.n_vocab;
+
+            gpt2bpe_vocab::id id = 0;
+
+            {
+                const int64_t t_start_sample_us = ggml_time_us();
+
+                id = sample_top_k_top_p_repeat(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens.data(), last_n_tokens.size(), top_k, top_p, temp, repeat_last_n, repeat_penalty, rng);
+
+                last_n_tokens.erase(last_n_tokens.begin());
+                last_n_tokens.push_back(id);
+
+                t_sample_us += ggml_time_us() - t_start_sample_us;
+            }
+
+            // add it to the context
+            embd.push_back(id);
+        } else {
+            // if here, it means we are still processing the input prompt
+            for (size_t k = i; k < embd_inp.size(); k++) {
+                embd.push_back(embd_inp[k]);
+                if (embd.size() > params.n_batch) {
+                    break;
+                }
+            }
+            i += embd.size() - 1;
+        }
+
+        // display text
+        for (auto id : embd) {
+            printf("%s", vocab.id_to_token[id].c_str()  );
+        }
+        fflush(stdout);
+
+        // end of text token
+        if (vocab.special_eos_id != -1 && embd.back() == vocab.special_eos_id) {
+            break;
+        }
+    }
+
+    // report timing
+    {
+        const int64_t t_main_end_us = ggml_time_us();
+
+        printf("\n\n");
+        printf("%s: mem per token = %8zu bytes\n", __func__, mem_per_token);
+        printf("%s:     load time = %8.2f ms\n", __func__, t_load_us/1000.0f);
+        printf("%s:   sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f);
+        printf("%s:  predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past);
+        printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
+    }
+
+    ggml_free(model.ctx);
+
+    return 0;
+}
diff --git a/gptneox-main.cpp b/examples/gptneox-wip/gptneox-main.cpp
similarity index 96%
rename from gptneox-main.cpp
rename to examples/gptneox-wip/gptneox-main.cpp
index aca8a5b0a..04af50245 100644
--- a/gptneox-main.cpp
+++ b/examples/gptneox-wip/gptneox-main.cpp
@@ -391,6 +391,7 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
     {
         int keyidx;
 
+        // check model architecture kv
         keyidx = gguf_find_key(ggufctx, "general.architecture");
         if (keyidx != -1) {
             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gptneox") != 0) {
@@ -492,6 +493,9 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
             vocab.token_to_id[word] = i;
             vocab.id_to_token[i] = word;
 
+            if( vocab.id_to_token[i] == "\n" ) {
+                vocab.linefeed_id = i;
+            }
         }
 
         std::vector<std::pair<std::string, std::string>> bpe_merges;
@@ -514,17 +518,18 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
         vocab.populate_bpe_ranks(bpe_merges);
 
 
-        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.bos_token_id"); if( keyidx != -1 ) {       vocab.special_bos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); vocab.special_have_bos=true; }
-        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.eos_token_id"); if( keyidx != -1 ) {       vocab.special_eos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); vocab.special_have_eos=true; }
-        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.unknown_token_id"); if( keyidx != -1 ) {   vocab.special_unk_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); vocab.special_have_unk=true; }
-        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); vocab.special_have_sep=true; }
-        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); vocab.special_have_pad=true; }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.bos_token_id"); if( keyidx != -1 ) {       vocab.special_bos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.eos_token_id"); if( keyidx != -1 ) {       vocab.special_eos_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.unknown_token_id"); if( keyidx != -1 ) {   vocab.special_unk_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
+        keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); }
 
-        if( vocab.special_have_bos ) { fprintf(stdout, "%s: bos token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); }
-        if( vocab.special_have_eos ) { fprintf(stdout, "%s: eos token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); }
-        if( vocab.special_have_unk ) { fprintf(stdout, "%s: unk token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); }
-        if( vocab.special_have_sep ) { fprintf(stdout, "%s: sep token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); }
-        if( vocab.special_have_pad ) { fprintf(stdout, "%s: pad token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); }
+        if( vocab.special_bos_id != -1 ) { fprintf(stdout, "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); }
+        if( vocab.special_eos_id != -1 ) { fprintf(stdout, "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); }
+        if( vocab.special_unk_id != -1 ) { fprintf(stdout, "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); }
+        if( vocab.special_sep_id != -1 ) { fprintf(stdout, "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); }
+        if( vocab.special_pad_id != -1 ) { fprintf(stdout, "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); }
+        if( vocab.linefeed_id    != -1 ) { fprintf(stdout, "%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); }
     }
 
 
@@ -656,6 +661,7 @@ ggml_tensor * gpt_neox_ff(
         const gpt_neox_block &block,
         ggml_context * ctx0,
         ggml_tensor * inp) {
+
     ggml_tensor * cur = ggml_norm(ctx0, inp);
 
     cur = ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, block.ln_2_g, cur), cur), ggml_repeat(ctx0, block.ln_2_b, cur));
@@ -1053,7 +1059,7 @@ int main(int argc, char ** argv) {
         fflush(stdout);
 
         // end of text token
-        if (vocab.special_have_eos && embd.back() == vocab.special_eos_id) {
+        if (vocab.special_eos_id != -1 && embd.back() == vocab.special_eos_id) {
             break;
         }
     }
diff --git a/examples/llama-bench/CMakeLists.txt b/examples/llama-bench/CMakeLists.txt
new file mode 100644
index 000000000..7e395afd0
--- /dev/null
+++ b/examples/llama-bench/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(TARGET llama-bench)
+add_executable(${TARGET} llama-bench.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
new file mode 100755
index 000000000..d11fff288
--- /dev/null
+++ b/examples/llama-bench/llama-bench.cpp
@@ -0,0 +1,969 @@
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <chrono>
+#include <cinttypes>
+#include <cstring>
+#include <ctime>
+#include <iterator>
+#include <map>
+#include <numeric>
+#include <regex>
+#include <sstream>
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+#include "ggml.h"
+#include "llama.h"
+#include "common.h"
+#include "build-info.h"
+#ifdef GGML_USE_CUBLAS
+#include "ggml-cuda.h"
+#endif
+
+// utils
+static uint64_t get_time_ns() {
+    using clock = std::chrono::high_resolution_clock;
+    return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
+}
+
+template<class T>
+static std::string join(const std::vector<T> & values, const std::string & delim) {
+    std::ostringstream str;
+    for (size_t i = 0; i < values.size(); i++) {
+        str << values[i];
+        if (i < values.size() - 1) {
+            str << delim;
+        }
+    }
+    return str.str();
+}
+
+template<class T>
+static std::vector<T> split(const std::string & str, char delim) {
+    std::vector<T> values;
+    std::istringstream str_stream(str);
+    std::string token;
+    while (std::getline(str_stream, token, delim)) {
+        T value;
+        std::istringstream token_stream(token);
+        token_stream >> value;
+        values.push_back(value);
+    }
+    return values;
+}
+
+template<typename T>
+static T avg(const std::vector<T> & v) {
+    if (v.empty()) {
+        return 0;
+    }
+    T sum = std::accumulate(v.begin(), v.end(), T(0));
+    return sum / (T)v.size();
+}
+
+template<typename T>
+static T stdev(const std::vector<T> & v) {
+    if (v.size() <= 1) {
+        return 0;
+    }
+    T mean = avg(v);
+    T sq_sum = std::inner_product(v.begin(), v.end(), v.begin(), T(0));
+    T stdev = std::sqrt(sq_sum / (T)(v.size() - 1) - mean * mean * (T)v.size() / (T)(v.size() - 1));
+    return stdev;
+}
+
+static bool ggml_cpu_has_metal() {
+#if defined(GGML_USE_METAL)
+    return true;
+#else
+    return false;
+#endif
+}
+
+static std::string get_cpu_info() {
+    std::string id;
+#ifdef __linux__
+    FILE * f = fopen("/proc/cpuinfo", "r");
+    if (f) {
+        char buf[1024];
+        while (fgets(buf, sizeof(buf), f)) {
+            if (strncmp(buf, "model name", 10) == 0) {
+                char * p = strchr(buf, ':');
+                if (p) {
+                    p++;
+                    while (std::isspace(*p)) {
+                        p++;
+                    }
+                    while (std::isspace(p[strlen(p) - 1])) {
+                        p[strlen(p) - 1] = '\0';
+                    }
+                    id = p;
+                    break;
+                }
+            }
+        }
+    }
+#endif
+    // TODO: other platforms
+    return id;
+}
+
+static std::string get_gpu_info() {
+    std::string id;
+#ifdef GGML_USE_CUBLAS
+    int count = ggml_cuda_get_device_count();
+    for (int i = 0; i < count; i++) {
+        char buf[128];
+        ggml_cuda_get_device_description(i, buf, sizeof(buf));
+        id += buf;
+        if (i < count - 1) {
+            id += "/";
+        }
+    }
+#endif
+    // TODO: other backends
+    return id;
+}
+
+// command line params
+enum output_formats {CSV, JSON, MARKDOWN, SQL};
+
+struct cmd_params {
+    std::vector<std::string> model;
+    std::vector<int> n_prompt;
+    std::vector<int> n_gen;
+    std::vector<int> n_batch;
+    std::vector<bool> f32_kv;
+    std::vector<int> n_threads;
+    std::vector<int> n_gpu_layers;
+    std::vector<int> main_gpu;
+    std::vector<bool> mul_mat_q;
+    std::vector<bool> low_vram;
+    std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split;
+    int reps;
+    bool verbose;
+    output_formats output_format;
+};
+
+static const cmd_params cmd_params_defaults = {
+    /* model         */ {"models/7B/ggml-model-q4_0.bin"},
+    /* n_prompt      */ {512},
+    /* n_gen         */ {128},
+    /* n_batch       */ {512},
+    /* f32_kv        */ {false},
+    /* n_threads     */ {get_num_physical_cores()},
+    /* n_gpu_layers  */ {99},
+    /* main_gpu      */ {0},
+    /* mul_mat_q     */ {true},
+    /* low_vram      */ {false},
+    /* tensor_split  */ {{}},
+    /* reps          */ 5,
+    /* verbose       */ false,
+    /* output_format */ MARKDOWN
+};
+
+static void print_usage(int /* argc */, char ** argv) {
+    fprintf(stdout, "usage: %s [options]\n", argv[0]);
+    fprintf(stdout, "\n");
+    fprintf(stdout, "options:\n");
+    fprintf(stdout, "  -h, --help\n");
+    fprintf(stdout, "  -m, --model <filename>            (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
+    fprintf(stdout, "  -p, --n-prompt <n>                (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
+    fprintf(stdout, "  -n, --n-gen <n>                   (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
+    fprintf(stdout, "  -b, --batch-size <n>              (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
+    fprintf(stdout, "  --memory-f32 <0|1>                (default: %s)\n", join(cmd_params_defaults.f32_kv, ",").c_str());
+    fprintf(stdout, "  -t, --threads <n>                 (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
+    fprintf(stdout, "  -ngl N, --n-gpu-layers <n>        (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
+    fprintf(stdout, "  -mg i, --main-gpu <n>             (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
+    fprintf(stdout, "  -lv, --low-vram <0|1>             (default: %s)\n", join(cmd_params_defaults.low_vram, ",").c_str());
+    fprintf(stdout, "  -mmq, --mul-mat-q <0|1>           (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str());
+    fprintf(stdout, "  -ts, --tensor_split <ts>                       \n");
+    fprintf(stdout, "  -r, --repetitions <n>             (default: %d)\n", cmd_params_defaults.reps);
+    fprintf(stdout, "  -o, --output <csv|json|md|sql>    (default: %s)\n", cmd_params_defaults.output_format == CSV ? "csv" : cmd_params_defaults.output_format == JSON ? "json" : "md");
+    fprintf(stdout, "  -v, --verbose                     (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "Multiple values can be given for each parameter by separating them with ',' or by repeating the parameter.\n");
+
+}
+
+static cmd_params parse_cmd_params(int argc, char ** argv) {
+    cmd_params params;
+    std::string arg;
+    bool invalid_param = false;
+    const std::string arg_prefix = "--";
+    const char split_delim = ',';
+
+    params.verbose = cmd_params_defaults.verbose;
+    params.output_format = cmd_params_defaults.output_format;
+    params.reps = cmd_params_defaults.reps;
+
+    for (int i = 1; i < argc; i++) {
+        arg = argv[i];
+        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+            std::replace(arg.begin(), arg.end(), '_', '-');
+        }
+
+        if (arg == "-h" || arg == "--help") {
+            print_usage(argc, argv);
+            exit(0);
+        } else if (arg == "-m" || arg == "--model") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<std::string>(argv[i], split_delim);
+            params.model.insert(params.model.end(), p.begin(), p.end());
+        } else if (arg == "-p" || arg == "--n-prompt") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.n_prompt.insert(params.n_prompt.end(), p.begin(), p.end());
+        } else if (arg == "-n" || arg == "--n-gen") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.n_gen.insert(params.n_gen.end(), p.begin(), p.end());
+        } else if (arg == "-b" || arg == "--batch-size") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.n_batch.insert(params.n_batch.end(), p.begin(), p.end());
+        } else if (arg == "--memory-f32") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.f32_kv.insert(params.f32_kv.end(), p.begin(), p.end());
+        } else if (arg == "-t" || arg == "--threads") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.n_threads.insert(params.n_threads.end(), p.begin(), p.end());
+        } else if (arg == "-ngl" || arg == "--n-gpu-layers") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<int>(argv[i], split_delim);
+            params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
+        } else if (arg == "-mg" || arg == "--main-gpu") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.main_gpu = split<int>(argv[i], split_delim);
+        } else if (arg == "-lv" || arg == "--low-vram") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<bool>(argv[i], split_delim);
+            params.low_vram.insert(params.low_vram.end(), p.begin(), p.end());
+        } else if (arg == "-mmq" || arg == "--mul-mat-q") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            auto p = split<bool>(argv[i], split_delim);
+            params.mul_mat_q.insert(params.mul_mat_q.end(), p.begin(), p.end());
+        } else if (arg == "-ts" || arg == "--tensor-split") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            for (auto ts : split<std::string>(argv[i], split_delim)) {
+                // split string by ; and /
+                const std::regex regex{R"([;/]+)"};
+                std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1};
+                std::vector<std::string> split_arg{it, {}};
+                GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES);
+
+                std::array<float, LLAMA_MAX_DEVICES> tensor_split;
+                for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) {
+                    if (i < split_arg.size()) {
+                        tensor_split[i] = std::stof(split_arg[i]);
+                    } else {
+                        tensor_split[i] = 0.0f;
+                    }
+                }
+                params.tensor_split.push_back(tensor_split);
+            }
+        } else if (arg == "-r" || arg == "--repetitions") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.reps = std::stoi(argv[i]);
+        } else if (arg == "-o" || arg == "--output") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            if (argv[i] == std::string("csv")) {
+                params.output_format = CSV;
+            } else if (argv[i] == std::string("json")) {
+                params.output_format = JSON;
+            } else if (argv[i] == std::string("md")) {
+                params.output_format = MARKDOWN;
+            } else if (argv[i] == std::string("sql")) {
+                params.output_format = SQL;
+            } else {
+                invalid_param = true;
+                break;
+            }
+        } else if (arg == "-v" || arg == "--verbose") {
+            params.verbose = true;
+        } else {
+            invalid_param = true;
+            break;
+        }
+    }
+    if (invalid_param) {
+        fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
+        print_usage(argc, argv);
+        exit(1);
+    }
+
+    // set defaults
+    if (params.model.empty())        { params.model = cmd_params_defaults.model; }
+    if (params.n_prompt.empty())     { params.n_prompt = cmd_params_defaults.n_prompt; }
+    if (params.n_gen.empty())        { params.n_gen = cmd_params_defaults.n_gen; }
+    if (params.n_batch.empty())      { params.n_batch = cmd_params_defaults.n_batch; }
+    if (params.f32_kv.empty())       { params.f32_kv = cmd_params_defaults.f32_kv; }
+    if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
+    if (params.main_gpu.empty())     { params.main_gpu = cmd_params_defaults.main_gpu; }
+    if (params.mul_mat_q.empty())    { params.mul_mat_q = cmd_params_defaults.mul_mat_q; }
+    if (params.low_vram.empty())     { params.low_vram = cmd_params_defaults.low_vram; }
+    if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
+    if (params.n_threads.empty())    { params.n_threads = cmd_params_defaults.n_threads; }
+
+    return params;
+}
+
+struct cmd_params_instance {
+    std::string model;
+    int n_prompt;
+    int n_gen;
+    int n_batch;
+    bool f32_kv;
+    int n_threads;
+    int n_gpu_layers;
+    int main_gpu;
+    bool mul_mat_q;
+    bool low_vram;
+    std::array<float, LLAMA_MAX_DEVICES> tensor_split;
+
+    llama_context_params to_llama_params() const {
+        llama_context_params lparams = llama_context_default_params();
+        lparams.n_ctx = n_prompt + n_gen;
+        lparams.n_batch = n_batch;
+        lparams.f16_kv = !f32_kv;
+        lparams.n_gpu_layers = n_gpu_layers;
+        lparams.main_gpu = main_gpu;
+        lparams.mul_mat_q = mul_mat_q;
+        lparams.low_vram = low_vram;
+        lparams.tensor_split = tensor_split.data();
+
+        return lparams;
+    }
+};
+
+static std::vector<cmd_params_instance> get_cmd_params_instances_int(const cmd_params & params, int n_gen, int n_prompt) {
+    std::vector<cmd_params_instance> instances;
+
+    for (const auto & m : params.model)
+    for (const auto & nb : params.n_batch)
+    for (const auto & fk : params.f32_kv)
+    for (const auto & nl : params.n_gpu_layers)
+    for (const auto & mg : params.main_gpu)
+    for (const auto & mmq : params.mul_mat_q)
+    for (const auto & lv : params.low_vram)
+    for (const auto & ts : params.tensor_split)
+    for (const auto & nt : params.n_threads) {
+        cmd_params_instance instance = {
+            /* .model        = */ m,
+            /* .n_prompt     = */ n_prompt,
+            /* .n_gen        = */ n_gen,
+            /* .n_batch      = */ nb,
+            /* .f32_kv       = */ fk,
+            /* .n_threads    = */ nt,
+            /* .n_gpu_layers = */ nl,
+            /* .main_gpu     = */ mg,
+            /* .mul_mat_q    = */ mmq,
+            /* .low_vram     = */ lv,
+            /* .tensor_split = */ ts,
+        };
+        instances.push_back(instance);
+    }
+    return instances;
+}
+
+static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_params & params) {
+    std::vector<cmd_params_instance> instances;
+
+    for (const auto & n_prompt : params.n_prompt) {
+        if (n_prompt == 0) {
+            continue;
+        }
+        auto instances_prompt = get_cmd_params_instances_int(params, 0, n_prompt);
+        instances.insert(instances.end(), instances_prompt.begin(), instances_prompt.end());
+    }
+
+    for (const auto & n_gen : params.n_gen) {
+        if (n_gen == 0) {
+            continue;
+        }
+        auto instances_gen = get_cmd_params_instances_int(params, n_gen, 0);
+        instances.insert(instances.end(), instances_gen.begin(), instances_gen.end());
+    }
+
+    return instances;
+}
+
+struct test {
+    static const std::string build_commit;
+    static const int build_number;
+    static const bool cuda;
+    static const bool opencl;
+    static const bool metal;
+    static const bool gpu_blas;
+    static const bool blas;
+    static const std::string cpu_info;
+    static const std::string gpu_info;
+    std::string model_filename;
+    std::string model_type;
+    int n_batch;
+    int n_threads;
+    bool f32_kv;
+    int n_gpu_layers;
+    int main_gpu;
+    bool mul_mat_q;
+    bool low_vram;
+    std::array<float, LLAMA_MAX_DEVICES> tensor_split;
+    int n_prompt;
+    int n_gen;
+    std::string test_time;
+    std::vector<uint64_t> samples_ns;
+
+    test(const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
+        model_filename = inst.model;
+        char buf[128];
+        llama_model_type(lmodel, buf, sizeof(buf));
+        model_type = buf;
+        n_batch = inst.n_batch;
+        n_threads = inst.n_threads;
+        f32_kv = inst.f32_kv;
+        n_gpu_layers = inst.n_gpu_layers;
+        main_gpu = inst.main_gpu;
+        mul_mat_q = inst.mul_mat_q;
+        low_vram = inst.low_vram;
+        tensor_split = inst.tensor_split;
+        n_prompt = inst.n_prompt;
+        n_gen = inst.n_gen;
+        // RFC 3339 date-time format
+        time_t t = time(NULL);
+        std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
+        test_time = buf;
+
+        (void) ctx;
+    }
+
+    uint64_t avg_ns() const {
+        return ::avg(samples_ns);
+    }
+
+    uint64_t stdev_ns() const {
+        return ::stdev(samples_ns);
+    }
+
+    std::vector<double> get_ts() const {
+        int n_tokens = n_prompt + n_gen;
+        std::vector<double> ts;
+        std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts), [n_tokens](uint64_t t) { return 1e9 * n_tokens / t; });
+        return ts;
+    }
+
+    double avg_ts() const {
+        return ::avg(get_ts());
+    }
+
+    double stdev_ts() const {
+        return ::stdev(get_ts());
+    }
+
+    static std::string get_backend() {
+        if (cuda) {
+            return "CUDA";
+        }
+        if (opencl) {
+            return "OpenCL";
+        }
+        if (metal) {
+            return "Metal";
+        }
+        if (gpu_blas) {
+            return "GPU BLAS";
+        }
+        if (blas) {
+            return "BLAS";
+        }
+        return "CPU";
+    }
+
+    static const std::vector<std::string> & get_fields() {
+        static const std::vector<std::string> fields = {
+            "build_commit", "build_number",
+            "cuda", "opencl", "metal", "gpu_blas", "blas",
+            "cpu_info", "gpu_info",
+            "model_filename", "model_type",
+            "n_batch", "n_threads", "f16_kv",
+            "n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
+            "n_prompt", "n_gen", "test_time",
+            "avg_ns", "stddev_ns",
+            "avg_ts", "stddev_ts"
+        };
+        return fields;
+    }
+
+    enum field_type {STRING, BOOL, INT, FLOAT};
+
+    static field_type get_field_type(const std::string & field) {
+        if (field == "build_number" || field == "n_batch" || field == "n_threads" ||
+            field == "n_gpu_layers" || field == "main_gpu" ||
+            field == "n_prompt" || field == "n_gen" ||
+            field == "avg_ns" || field == "stddev_ns") {
+            return INT;
+        }
+        if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" ||
+            field == "f16_kv" || field == "mul_mat_q" || field == "low_vram") {
+            return BOOL;
+        }
+        if (field == "avg_ts" || field == "stddev_ts") {
+            return FLOAT;
+        }
+        return STRING;
+    }
+
+    std::vector<std::string> get_values() const {
+        std::string tensor_split_str;
+        int max_nonzero = 0;
+        for (int i = 0; i < LLAMA_MAX_DEVICES; i++) {
+            if (tensor_split[i] > 0) {
+                max_nonzero = i;
+            }
+        }
+        for (int i = 0; i <= max_nonzero; i++) {
+            char buf[32];
+            snprintf(buf, sizeof(buf), "%.2f", tensor_split[i]);
+            tensor_split_str += buf;
+            if (i < max_nonzero) {
+                tensor_split_str += "/";
+            }
+        }
+        std::vector<std::string> values = {
+            build_commit, std::to_string(build_number),
+            std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
+            cpu_info, gpu_info,
+            model_filename, model_type,
+            std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
+            std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
+            std::to_string(n_prompt), std::to_string(n_gen), test_time,
+            std::to_string(avg_ns()), std::to_string(stdev_ns()),
+            std::to_string(avg_ts()), std::to_string(stdev_ts())
+        };
+        return values;
+    }
+
+    std::map<std::string, std::string> get_map() const {
+        std::map<std::string, std::string> map;
+        auto fields = get_fields();
+        auto values = get_values();
+        std::transform(fields.begin(), fields.end(), values.begin(),
+                std::inserter(map, map.end()), std::make_pair<const std::string &, const std::string &>);
+        return map;
+    }
+};
+
+const std::string test::build_commit = BUILD_COMMIT;
+const int         test::build_number = BUILD_NUMBER;
+const bool        test::cuda         = !!ggml_cpu_has_cublas();
+const bool        test::opencl       = !!ggml_cpu_has_clblast();
+const bool        test::metal        = !!ggml_cpu_has_metal();
+const bool        test::gpu_blas     = !!ggml_cpu_has_gpublas();
+const bool        test::blas         = !!ggml_cpu_has_blas();
+const std::string test::cpu_info     = get_cpu_info();
+const std::string test::gpu_info     = get_gpu_info();
+
+struct printer {
+    virtual ~printer() {}
+
+    FILE * fout;
+    virtual void print_header(const cmd_params & params) { (void) params; };
+    virtual void print_test(const test & t) = 0;
+    virtual void print_footer() { };
+};
+
+struct csv_printer : public printer {
+    static std::string escape_csv(const std::string & field) {
+        std::string escaped = "\"";
+        for (auto c : field) {
+            if (c == '"') {
+                escaped += "\"";
+            }
+            escaped += c;
+        }
+        escaped += "\"";
+        return escaped;
+    }
+
+    void print_header(const cmd_params & params) override  {
+        std::vector<std::string> fields = test::get_fields();
+        fprintf(fout, "%s\n", join(fields, ",").c_str());
+        (void) params;
+    }
+
+    void print_test(const test & t) override {
+        std::vector<std::string> values = t.get_values();
+        std::transform(values.begin(), values.end(), values.begin(), escape_csv);
+        fprintf(fout, "%s\n", join(values, ",").c_str());
+    }
+};
+
+struct json_printer : public printer {
+    bool first = true;
+
+    static std::string escape_json(const std::string & value) {
+        std::string escaped;
+        for (auto c : value) {
+            if (c == '"') {
+                escaped += "\\\"";
+            } else if (c == '\\') {
+                escaped += "\\\\";
+            } else  if (c <= 0x1f) {
+                char buf[8];
+                snprintf(buf, sizeof(buf), "\\u%04x", c);
+                escaped += buf;
+            } else {
+                escaped += c;
+            }
+        }
+        return escaped;
+    }
+
+    static std::string format_value(const std::string & field, const std::string & value) {
+        switch (test::get_field_type(field)) {
+            case test::STRING:
+                return "\"" + escape_json(value) + "\"";
+            case test::BOOL:
+                return value == "0" ? "false" : "true";
+            default:
+                return value;
+        }
+    }
+
+    void print_header(const cmd_params & params) override {
+        fprintf(fout, "[\n");
+        (void) params;
+    }
+
+    void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
+        assert(fields.size() == values.size());
+        for (size_t i = 0; i < fields.size(); i++) {
+            fprintf(fout, "    \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
+        }
+    }
+
+    void print_test(const test & t) override {
+        if (first) {
+            first = false;
+        } else {
+            fprintf(fout, ",\n");
+        }
+        fprintf(fout, "  {\n");
+        print_fields(test::get_fields(), t.get_values());
+        fprintf(fout, "    \"samples_ns\": [ %s ],\n", join(t.samples_ns, ", ").c_str());
+        fprintf(fout, "    \"samples_ts\": [ %s ]\n", join(t.get_ts(), ", ").c_str());
+        fprintf(fout, "  }");
+        fflush(fout);
+    }
+
+    void print_footer() override {
+        fprintf(fout, "\n]\n");
+    }
+};
+
+struct markdown_printer : public printer {
+    std::vector<std::string> fields;
+
+    static int get_field_width(const std::string & field) {
+        if (field == "model") {
+            return -30;
+        }
+        if (field == "t/s") {
+            return 15;
+        }
+        int width = std::max((int)field.length(), 10);
+
+        if (test::get_field_type(field) == test::STRING) {
+            return -width;
+        }
+        return width;
+    }
+
+    void print_header(const cmd_params & params) override {
+        // select fields to print
+        fields = { "model", "backend" };
+        bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS";
+        if (!is_cpu_backend) {
+            fields.push_back("n_gpu_layers");
+        }
+        if (params.n_batch.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {
+            fields.push_back("n_threads");
+        }
+        if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
+            fields.push_back("n_batch");
+        }
+        if (params.f32_kv.size() > 1 || params.f32_kv != cmd_params_defaults.f32_kv) {
+            fields.push_back("f16_kv");
+        }
+        if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) {
+            fields.push_back("main_gpu");
+        }
+        if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
+            fields.push_back("mul_mat_q");
+        }
+        if (params.low_vram.size() > 1 || params.low_vram != cmd_params_defaults.low_vram) {
+            fields.push_back("low_vram");
+        }
+        if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
+            fields.push_back("tensor_split");
+        }
+        fields.push_back("test");
+        fields.push_back("t/s");
+
+        fprintf(fout, "|");
+        for (const auto & field : fields) {
+            fprintf(fout, " %*s |", get_field_width(field), field.c_str());
+        }
+        fprintf(fout, "\n");
+        fprintf(fout, "|");
+        for (const auto & field : fields) {
+            int width = get_field_width(field);
+            fprintf(fout, " %s%s |", std::string(std::abs(width) - 1, '-').c_str(), width > 0 ? ":" : "-");
+        }
+        fprintf(fout, "\n");
+    }
+
+    void print_test(const test & t) override {
+        std::map<std::string, std::string> vmap = t.get_map();
+
+        fprintf(fout, "|");
+        for (const auto & field : fields) {
+            std::string value;
+            if (field == "model") {
+                value = t.model_type;
+            } else if (field == "backend") {
+                value = test::get_backend();
+            } else if (field == "test") {
+                char buf[128];
+                if (t.n_prompt > 0 && t.n_gen == 0) {
+                    snprintf(buf, sizeof(buf), "pp %d", t.n_prompt);
+                } else if (t.n_gen > 0 && t.n_prompt == 0) {
+                    snprintf(buf, sizeof(buf), "tg %d", t.n_gen);
+                } else {
+                    assert(false);
+                    exit(1);
+                }
+                value = buf;
+            } else if (field == "t/s") {
+                char buf[128];
+                snprintf(buf, sizeof(buf), "%.2f ± %.2f", t.avg_ts(), t.stdev_ts());
+                value = buf;
+            } else if (vmap.find(field) != vmap.end()) {
+                value = vmap.at(field);
+            } else {
+                assert(false);
+                exit(1);
+            }
+
+            int width = get_field_width(field);
+            if (field == "t/s") {
+                // HACK: the utf-8 character is 2 bytes
+                width += 1;
+            }
+            fprintf(fout, " %*s |", width, value.c_str());
+        }
+        fprintf(fout, "\n");
+    }
+
+    void print_footer() override {
+        fprintf(fout, "\nbuild: %s (%d)\n", test::build_commit.c_str(), test::build_number);
+    }
+};
+
+struct sql_printer : public printer {
+    static std::string get_sql_field_type(const std::string & field) {
+        switch (test::get_field_type(field)) {
+            case test::STRING:
+                return "TEXT";
+            case test::BOOL:
+            case test::INT:
+                return "INTEGER";
+            case test::FLOAT:
+                return "REAL";
+            default:
+                assert(false);
+                exit(1);
+        }
+    }
+
+    void print_header(const cmd_params & params) override {
+        std::vector<std::string> fields = test::get_fields();
+        fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n");
+        for (size_t i = 0; i < fields.size(); i++) {
+            fprintf(fout, "  %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(),  i < fields.size() - 1 ? "," : "");
+        }
+        fprintf(fout, ");\n");
+        fprintf(fout, "\n");
+        (void) params;
+    }
+
+    void print_test(const test & t) override {
+        fprintf(fout, "INSERT INTO test (%s) ", join(test::get_fields(), ", ").c_str());
+        fprintf(fout, "VALUES (");
+        std::vector<std::string> values = t.get_values();
+        for (size_t i = 0; i < values.size(); i++) {
+            fprintf(fout, "'%s'%s", values.at(i).c_str(), i < values.size() - 1 ? ", " : "");
+        }
+        fprintf(fout, ");\n");
+    }
+};
+
+static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
+    std::vector<llama_token> tokens(n_batch, llama_token_bos(ctx));
+    int n_processed = 0;
+    while (n_processed < n_prompt) {
+        int n_tokens = std::min(n_prompt - n_processed, n_batch);
+        llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);
+        n_processed += n_tokens;
+    }
+}
+
+static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
+    llama_token token = llama_token_bos(ctx);
+    for (int i = 0; i < n_gen; i++) {
+        llama_eval(ctx, &token, 1, n_past + i, n_threads);
+    }
+}
+
+static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
+    (void) level;
+    (void) text;
+    (void) user_data;
+}
+
+int main(int argc, char ** argv) {
+#if !defined(NDEBUG)
+    fprintf(stderr, "warning: asserts enabled, performance may be affected\n");
+#endif
+
+#if (defined(_MSC_VER) && defined(_DEBUG)) || (!defined(_MSC_VER) && !defined(__OPTIMIZE__))
+    fprintf(stderr, "warning: debug build, performance may be affected\n");
+#endif
+
+#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__)
+    fprintf(stderr, "warning: sanitizer enabled, performance may be affected\n");
+#endif
+
+    cmd_params params = parse_cmd_params(argc, argv);
+
+    // initialize llama.cpp
+    if (!params.verbose) {
+        llama_log_set(llama_null_log_callback, NULL);
+    }
+    bool numa = false;
+    llama_backend_init(numa);
+
+    // initialize printer
+    std::unique_ptr<printer> p;
+    switch (params.output_format) {
+        case CSV:
+            p.reset(new csv_printer());
+            break;
+        case JSON:
+            p.reset(new json_printer());
+            break;
+        case MARKDOWN:
+            p.reset(new markdown_printer());
+            break;
+        case SQL:
+            p.reset(new sql_printer());
+            break;
+        default:
+            assert(false);
+            exit(1);
+    }
+    p->fout = stdout;
+    p->print_header(params);
+
+    std::vector<cmd_params_instance> params_instances = get_cmd_params_instances(params);
+
+    for (const auto & inst : params_instances) {
+        // TODO: keep the model between tests when possible
+        llama_context_params lparams = inst.to_llama_params();
+
+        llama_model * lmodel  = llama_load_model_from_file(inst.model.c_str(), lparams);
+        if (lmodel == NULL) {
+            fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, inst.model.c_str());
+            return 1;
+        }
+
+        llama_context * ctx = llama_new_context_with_model(lmodel, lparams);
+        if (ctx == NULL) {
+            fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, inst.model.c_str());
+            llama_free_model(lmodel);
+            return 1;
+        }
+
+        test t(inst, lmodel, ctx);
+
+        // warmup run
+        test_gen(ctx, 1, 0, t.n_threads);
+
+        for (int i = 0; i < params.reps; i++) {
+            uint64_t t_start = get_time_ns();
+            if (t.n_prompt > 0) {
+                test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
+            }
+            if (t.n_gen > 0) {
+                test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
+            }
+            uint64_t t_ns = get_time_ns() - t_start;
+            t.samples_ns.push_back(t_ns);
+        }
+
+        p->print_test(t);
+
+        llama_print_timings(ctx);
+
+        llama_free(ctx);
+        llama_free_model(lmodel);
+    }
+
+    p->print_footer();
+
+    llama_backend_free();
+
+    return 0;
+}
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 5c2f64883..388e1f7d7 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -143,7 +143,7 @@ int main(int argc, char ** argv) {
         {
             fprintf(stderr, "%s: testing memory usage for n_batch = %d, n_ctx = %d\n", __func__, params.n_batch, params.n_ctx);
 
-            const std::vector<llama_token> tmp(params.n_batch, llama_token_bos());
+            const std::vector<llama_token> tmp(params.n_batch, llama_token_bos(ctx));
             llama_eval(ctx, tmp.data(), tmp.size(), params.n_ctx, params.n_threads);
         }
 
@@ -345,10 +345,9 @@ int main(int argc, char ** argv) {
         fprintf(stderr, "\n");
 
         {
-            auto it = params.logit_bias.find(llama_token_eos());
+            auto it = params.logit_bias.find(llama_token_eos(ctx));
             if (it != params.logit_bias.end() && it->second == -INFINITY) {
-                fprintf(stderr,
-                    "%s: warning: EOS token is disabled, which will cause most grammars to fail\n", __func__);
+                fprintf(stderr, "%s: warning: EOS token is disabled, which will cause most grammars to fail\n", __func__);
             }
         }
 
@@ -398,7 +397,7 @@ int main(int argc, char ** argv) {
 
     // do one empty run to warm up the model
     {
-        const std::vector<llama_token> tmp = { llama_token_bos(), };
+        const std::vector<llama_token> tmp = { llama_token_bos(ctx), };
         llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
         llama_reset_timings(ctx);
     }
@@ -582,7 +581,7 @@ int main(int argc, char ** argv) {
                 }
 
                 // Apply penalties
-                float nl_logit = logits[llama_token_nl()];
+                float nl_logit = logits[llama_token_nl(ctx)];
                 auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
                 llama_sample_repetition_penalty(ctx, &candidates_p,
                     last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
@@ -591,7 +590,7 @@ int main(int argc, char ** argv) {
                     last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
                     last_n_repeat, alpha_frequency, alpha_presence);
                 if (!penalize_nl) {
-                    logits[llama_token_nl()] = nl_logit;
+                    logits[llama_token_nl(ctx)] = nl_logit;
                 }
 
                 if (grammar != NULL) {
@@ -697,7 +696,7 @@ int main(int argc, char ** argv) {
             }
 
             // deal with end of text token in interactive mode
-            if (last_n_tokens.back() == llama_token_eos()) {
+            if (last_n_tokens.back() == llama_token_eos(ctx)) {
                 if (params.interactive) {
                     if (params.antiprompt.size() != 0) {
                         // tokenize and inject first reverse prompt
@@ -721,7 +720,7 @@ int main(int argc, char ** argv) {
                 }
 
                 if (params.input_prefix_bos) {
-                    embd_inp.push_back(llama_token_bos());
+                    embd_inp.push_back(llama_token_bos(ctx));
                 }
 
                 std::string buffer;
@@ -786,7 +785,7 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos() && !(params.instruct || params.interactive)) {
+        if (!embd.empty() && embd.back() == llama_token_eos(ctx) && !(params.instruct || params.interactive)) {
             fprintf(stderr, " [end of text]\n");
             break;
         }
diff --git a/examples/metal/metal.cpp b/examples/metal/metal.cpp
index 7438defde..c05a4fa93 100644
--- a/examples/metal/metal.cpp
+++ b/examples/metal/metal.cpp
@@ -2,7 +2,7 @@
 //
 // - First, export a LLaMA graph:
 //
-//  $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
+//  $ ./bin/main -m ../models/7B/ggml-model-q4_0.gguf --export
 //
 // - Run this tool to evaluate the exported graph:
 //
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 62433e983..9eadbeaa9 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -63,7 +63,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
 
             // add BOS token for the first batch of each chunk
             if (j == 0) {
-                tokens[batch_start] = llama_token_bos();
+                tokens[batch_start] = llama_token_bos(ctx);
             }
 
             if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads)) {
@@ -88,7 +88,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
                 fprintf(stderr, "%d hours ", total_seconds / (60*60));
                 total_seconds = total_seconds % (60*60);
             }
-            fprintf(stderr, "%d minutes\n", total_seconds / 60);
+            fprintf(stderr, "%.2f minutes\n", total_seconds / 60.0);
         }
 
         // We get the logits for all the tokens in the context window (params.n_ctx)
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index a330b20df..06ce18f09 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -1,7 +1,6 @@
 #include "ggml.h"
 #include "build-info.h"
 
-#define LLAMA_API_CPP // TODO: eliminate me
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 
@@ -25,7 +24,7 @@
 #endif
 
 struct quantize_stats_params {
-    std::string model = "models/7B/ggml-model-f16.bin";
+    std::string model = "models/7B/ggml-model-f16.gguf";
     bool verbose = false;
     bool per_layer_stats = false;
     bool print_histogram = false;
diff --git a/examples/server/README.md b/examples/server/README.md
index 1559dd3f2..ce2d2c9c6 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -5,7 +5,7 @@ This example demonstrates a simple HTTP API server and a simple web front end to
 Command line options:
 
 -   `--threads N`, `-t N`: Set the number of threads to use during computation.
--   `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`).
+-   `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
 -   `-m ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
 -   `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
 -   `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
@@ -48,14 +48,12 @@ To get started right away, run the following command, making sure to use the cor
 ### Unix-based systems (Linux, macOS, etc.):
 
 ```bash
-./server -m models/7B/ggml-model.bin -c 2048
+./server -m models/7B/ggml-model.gguf -c 2048
 ```
 
 ### Windows:
 
 ```powershell
-server.exe -m models\7B\ggml-model.bin -c 2048
-```
 
 The above command will start a server that by default listens on `127.0.0.1:8080`.
 You can consume the endpoints with Postman or NodeJS with axios library. You can visit the web front end at the same url.
diff --git a/examples/server/index.html.hpp b/examples/server/index.html.hpp
index 095b20610..76851d9c6 100644
--- a/examples/server/index.html.hpp
+++ b/examples/server/index.html.hpp
@@ -289,972 +289,1410 @@ unsigned char index_html[] = {
   0x20, 0x72, 0x61, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x3a, 0x20, 0x27, 0x27, 0x2c,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
-  0x53, 0x74, 0x61, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e,
-  0x61, 0x6c, 0x28, 0x6e, 0x75, 0x6c, 0x6c, 0x29, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
-  0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e,
-  0x61, 0x6c, 0x28, 0x6e, 0x75, 0x6c, 0x6c, 0x29, 0x0a, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x65, 0x6e, 0x65,
-  0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6d,
-  0x70, 0x75, 0x74, 0x65, 0x64, 0x28, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
-  0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76,
-  0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c,
-  0x20, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
-  0x20, 0x63, 0x68, 0x61, 0x74, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64,
-  0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x64, 0x28,
-  0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
-  0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e,
-  0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x2e, 0x6c, 0x65, 0x6e, 0x67, 0x74,
-  0x68, 0x20, 0x3e, 0x20, 0x30, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63,
-  0x72, 0x69, 0x70, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x20, 0x3d,
-  0x20, 0x28, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
-  0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x20, 0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
-  0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72,
-  0x69, 0x70, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f,
-  0x2f, 0x20, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x74, 0x65, 0x6d,
-  0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
-  0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
-  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x28,
-  0x73, 0x74, 0x72, 0x2c, 0x20, 0x65, 0x78, 0x74, 0x72, 0x61, 0x53, 0x65,
-  0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x73,
-  0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x65,
-  0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65,
-  0x78, 0x74, 0x72, 0x61, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73,
-  0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x7b,
-  0x20, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73,
-  0x2c, 0x20, 0x2e, 0x2e, 0x2e, 0x65, 0x78, 0x74, 0x72, 0x61, 0x53, 0x65,
-  0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x3b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x53, 0x74, 0x72, 0x69, 0x6e,
-  0x67, 0x28, 0x73, 0x74, 0x72, 0x29, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61,
-  0x63, 0x65, 0x41, 0x6c, 0x6c, 0x28, 0x2f, 0x5c, 0x7b, 0x5c, 0x7b, 0x28,
-  0x2e, 0x2a, 0x3f, 0x29, 0x5c, 0x7d, 0x5c, 0x7d, 0x2f, 0x67, 0x2c, 0x20,
-  0x28, 0x5f, 0x2c, 0x20, 0x6b, 0x65, 0x79, 0x29, 0x20, 0x3d, 0x3e, 0x20,
-  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x73, 0x65, 0x74,
-  0x74, 0x69, 0x6e, 0x67, 0x73, 0x5b, 0x6b, 0x65, 0x79, 0x5d, 0x29, 0x29,
+  0x20, 0x2f, 0x2a, 0x20, 0x53, 0x54, 0x41, 0x52, 0x54, 0x3a, 0x20, 0x53,
+  0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x73,
+  0x74, 0x6f, 0x72, 0x69, 0x6e, 0x67, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70,
+  0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x20,
+  0x61, 0x6e, 0x64, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65,
+  0x72, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x73, 0x65,
+  0x72, 0x20, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x53, 0x74, 0x6f, 0x72, 0x61,
+  0x67, 0x65, 0x20, 0x2a, 0x2f, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73,
+  0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61,
+  0x67, 0x65, 0x4b, 0x65, 0x79, 0x20, 0x3d, 0x20, 0x22, 0x6c, 0x6c, 0x61,
+  0x6d, 0x61, 0x63, 0x70, 0x70, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
+  0x5f, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61,
+  0x67, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75,
+  0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
+  0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x74,
+  0x44, 0x61, 0x74, 0x61, 0x46, 0x72, 0x6f, 0x6d, 0x4f, 0x62, 0x6a, 0x65,
+  0x63, 0x74, 0x28, 0x74, 0x61, 0x67, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74,
+  0x65, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67,
+  0x65, 0x2e, 0x73, 0x65, 0x74, 0x49, 0x74, 0x65, 0x6d, 0x28, 0x6c, 0x6f,
+  0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f,
+  0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4b, 0x65, 0x79, 0x20, 0x2b,
+  0x20, 0x27, 0x2f, 0x27, 0x20, 0x2b, 0x20, 0x74, 0x61, 0x67, 0x2c, 0x20,
+  0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x69,
+  0x66, 0x79, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x29,
   0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x2f, 0x2f, 0x20, 0x73, 0x65, 0x6e, 0x64, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x20, 0x74, 0x6f, 0x20, 0x73, 0x65, 0x72, 0x76,
-  0x65, 0x72, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
-  0x20, 0x63, 0x68, 0x61, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e,
-  0x63, 0x20, 0x28, 0x6d, 0x73, 0x67, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63,
-  0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c,
-  0x6f, 0x67, 0x28, 0x27, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x20,
-  0x72, 0x75, 0x6e, 0x6e, 0x69, 0x6e, 0x67, 0x2e, 0x2e, 0x2e, 0x27, 0x29,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
-  0x74, 0x75, 0x72, 0x6e, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74,
-  0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
-  0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x41, 0x62, 0x6f, 0x72, 0x74,
-  0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x28, 0x29,
-  0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x61,
-  0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74,
-  0x65, 0x28, 0x5b, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
-  0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e,
-  0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x2c, 0x20, 0x5b, 0x22, 0x7b, 0x7b,
-  0x75, 0x73, 0x65, 0x72, 0x7d, 0x7d, 0x22, 0x2c, 0x20, 0x6d, 0x73, 0x67,
-  0x5d, 0x5d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
-  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20,
-  0x3d, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x73,
-  0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
-  0x2e, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x3a, 0x20, 0x6d, 0x73, 0x67, 0x2c, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x68, 0x69, 0x73, 0x74, 0x6f,
-  0x72, 0x79, 0x3a, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63,
-  0x72, 0x69, 0x70, 0x74, 0x2e, 0x66, 0x6c, 0x61, 0x74, 0x4d, 0x61, 0x70,
-  0x28, 0x28, 0x5b, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x5d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x74, 0x65,
-  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x73, 0x65, 0x73, 0x73, 0x69,
-  0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x68, 0x69, 0x73,
-  0x74, 0x6f, 0x72, 0x79, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
-  0x2c, 0x20, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x7d, 0x29, 0x29, 0x2e, 0x6a, 0x6f, 0x69, 0x6e,
-  0x28, 0x22, 0x5c, 0x6e, 0x22, 0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74,
-  0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x20, 0x3d, 0x20, 0x27, 0x27,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x20, 0x3d, 0x20,
-  0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
+  0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6c, 0x6f,
+  0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f,
+  0x73, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61, 0x46, 0x72, 0x6f, 0x6d, 0x52,
+  0x61, 0x77, 0x54, 0x65, 0x78, 0x74, 0x28, 0x74, 0x61, 0x67, 0x2c, 0x20,
+  0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x53, 0x74,
+  0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x73, 0x65, 0x74, 0x49, 0x74, 0x65,
+  0x6d, 0x28, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72,
+  0x61, 0x67, 0x65, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4b,
+  0x65, 0x79, 0x20, 0x2b, 0x20, 0x27, 0x2f, 0x27, 0x20, 0x2b, 0x20, 0x74,
+  0x61, 0x67, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6c, 0x6f,
+  0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f,
+  0x67, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61, 0x41, 0x73, 0x4f, 0x62, 0x6a,
+  0x65, 0x63, 0x74, 0x28, 0x74, 0x61, 0x67, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69,
+  0x74, 0x65, 0x6d, 0x20, 0x3d, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x53,
+  0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e, 0x67, 0x65, 0x74, 0x49, 0x74,
+  0x65, 0x6d, 0x28, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f,
+  0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+  0x4b, 0x65, 0x79, 0x20, 0x2b, 0x20, 0x27, 0x2f, 0x27, 0x20, 0x2b, 0x20,
+  0x74, 0x61, 0x67, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x69, 0x66, 0x20, 0x28, 0x21, 0x69, 0x74, 0x65, 0x6d, 0x29, 0x20, 0x7b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
+  0x75, 0x72, 0x6e, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
+  0x75, 0x72, 0x6e, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72,
+  0x73, 0x65, 0x28, 0x69, 0x74, 0x65, 0x6d, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f,
+  0x6e, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72,
+  0x61, 0x67, 0x65, 0x5f, 0x67, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61, 0x41,
+  0x73, 0x52, 0x61, 0x77, 0x54, 0x65, 0x78, 0x74, 0x28, 0x74, 0x61, 0x67,
+  0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x20, 0x69, 0x74, 0x65, 0x6d, 0x20, 0x3d, 0x20, 0x6c,
+  0x6f, 0x63, 0x61, 0x6c, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2e,
+  0x67, 0x65, 0x74, 0x49, 0x74, 0x65, 0x6d, 0x28, 0x6c, 0x6f, 0x63, 0x61,
+  0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x74,
+  0x6f, 0x72, 0x61, 0x67, 0x65, 0x4b, 0x65, 0x79, 0x20, 0x2b, 0x20, 0x27,
+  0x2f, 0x27, 0x20, 0x2b, 0x20, 0x74, 0x61, 0x67, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x69, 0x74,
+  0x65, 0x6d, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x75, 0x6c,
+  0x6c, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65,
+  0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x69, 0x74, 0x65,
+  0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
+  0x20, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x20, 0x61, 0x20, 0x63, 0x6f,
+  0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20,
+  0x75, 0x73, 0x65, 0x72, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, 0x65, 0x74, 0x74, 0x69,
+  0x6e, 0x67, 0x73, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x73, 0x74, 0x20, 0x73, 0x61, 0x76, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x20, 0x3d, 0x20,
+  0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x28, 0x7b, 0x7d, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x65, 0x6c,
+  0x65, 0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e,
+  0x61, 0x6c, 0x28, 0x7b, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x27,
+  0x27, 0x2c, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x3a,
+  0x20, 0x7b, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x3a, 0x20,
+  0x7b, 0x7d, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x3a, 0x20,
+  0x7b, 0x7d, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x2f, 0x2f, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x69, 0x6d,
+  0x70, 0x6f, 0x72, 0x74, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x6c, 0x79,
+  0x20, 0x73, 0x61, 0x76, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, 0x65, 0x74,
+  0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x69, 0x66, 0x20, 0x74, 0x68, 0x65,
+  0x72, 0x65, 0x20, 0x61, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x79, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x75, 0x73, 0x65, 0x72, 0x20, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64,
+  0x20, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x61, 0x72,
+  0x65, 0x20, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x20, 0x69, 0x6e, 0x20,
+  0x6f, 0x6e, 0x65, 0x20, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x69, 0x6e, 0x20, 0x66, 0x6f, 0x72,
+  0x6d, 0x20, 0x6f, 0x66, 0x20, 0x7b, 0x20, 0x22, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x20, 0x22,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x61, 0x74, 0x61,
+  0x22, 0x20, 0x7d, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x7b, 0x20, 0x22, 0x73,
+  0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x74, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x73, 0x65,
+  0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x64, 0x61, 0x74, 0x61, 0x22, 0x20,
+  0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
+  0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x49, 0x6d, 0x70, 0x6f,
+  0x72, 0x74, 0x69, 0x6e, 0x67, 0x20, 0x73, 0x61, 0x76, 0x65, 0x64, 0x20,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x27, 0x29, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x69, 0x6d, 0x70,
+  0x6f, 0x72, 0x74, 0x65, 0x64, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x73, 0x20, 0x3d, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73,
+  0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x67, 0x65, 0x74, 0x44, 0x61,
+  0x74, 0x61, 0x41, 0x73, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x27,
+  0x75, 0x73, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x73, 0x27, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66,
+  0x20, 0x28, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x54, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x61, 0x76, 0x65,
+  0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x20,
+  0x77, 0x65, 0x72, 0x65, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73,
+  0x66, 0x75, 0x6c, 0x79, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x65,
+  0x64, 0x2e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+  0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x50,
+  0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x20, 0x73, 0x61,
+  0x76, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x69,
+  0x6e, 0x67, 0x20, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x27, 0x29, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
+  0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x69, 0x6d, 0x70, 0x6f, 0x72,
+  0x74, 0x65, 0x64, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73,
+  0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x61, 0x76,
+  0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20,
+  0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x73, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x2f, 0x2f, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65,
+  0x20, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x74, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x73, 0x61, 0x76, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x7b,
+  0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x3a, 0x20, 0x73, 0x65,
+  0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c,
+  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x3a, 0x20, 0x70, 0x61, 0x72,
+  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f,
+  0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x74, 0x44,
+  0x61, 0x74, 0x61, 0x46, 0x72, 0x6f, 0x6d, 0x4f, 0x62, 0x6a, 0x65, 0x63,
+  0x74, 0x28, 0x27, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x73, 0x27, 0x2c, 0x20, 0x73, 0x61, 0x76, 0x65,
+  0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x6e, 0x6f, 0x20, 0x73,
+  0x61, 0x76, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x73, 0x20, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x65, 0x64, 0x2e,
   0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x61, 0x72, 0x61, 0x6d,
-  0x73, 0x20, 0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
+  0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x49, 0x6e, 0x69,
+  0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x69, 0x6e, 0x67, 0x20, 0x4c, 0x6f,
+  0x63, 0x61, 0x6c, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x20, 0x61,
+  0x6e, 0x64, 0x20, 0x73, 0x61, 0x76, 0x69, 0x6e, 0x67, 0x20, 0x64, 0x65,
+  0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x27, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x73, 0x61, 0x76, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x20, 0x3d, 0x20, 0x7b, 0x20, 0x22, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c,
+  0x74, 0x22, 0x3a, 0x20, 0x7b, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
+  0x6e, 0x3a, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
+  0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x20, 0x7d, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61,
+  0x67, 0x65, 0x5f, 0x73, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61, 0x46, 0x72,
+  0x6f, 0x6d, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x27, 0x75, 0x73,
+  0x65, 0x72, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73,
+  0x27, 0x2c, 0x20, 0x73, 0x61, 0x76, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2e, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e,
+  0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x52, 0x65, 0x73, 0x65, 0x74, 0x54, 0x6f, 0x44, 0x65, 0x66, 0x61,
+  0x75, 0x6c, 0x74, 0x28, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f,
+  0x67, 0x28, 0x27, 0x52, 0x65, 0x73, 0x65, 0x74, 0x69, 0x6e, 0x67, 0x20,
+  0x74, 0x68, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f,
+  0x20, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x27, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x65,
+  0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65,
+  0x20, 0x3d, 0x20, 0x27, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x27,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x65, 0x6c, 0x65,
+  0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x64,
+  0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x73, 0x61, 0x76, 0x65, 0x64, 0x55,
+  0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5b, 0x27, 0x64, 0x65, 0x66, 0x61,
+  0x75, 0x6c, 0x74, 0x27, 0x5d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69,
+  0x6f, 0x6e, 0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x28, 0x74, 0x29, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x65, 0x73, 0x73,
+  0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20,
+  0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69,
+  0x6f, 0x6e, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x61,
+  0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d,
+  0x20, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x70, 0x61, 0x72, 0x61,
+  0x6d, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20,
+  0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x52, 0x65, 0x73, 0x65, 0x74, 0x54, 0x6f, 0x44, 0x65, 0x66, 0x61, 0x75,
+  0x6c, 0x74, 0x41, 0x6e, 0x64, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x28, 0x29,
+  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73, 0x65,
+  0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73,
+  0x65, 0x74, 0x54, 0x6f, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x28,
+  0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73, 0x65, 0x72,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x41, 0x70, 0x70, 0x6c,
+  0x79, 0x28, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x65, 0x64, 0x55, 0x73,
+  0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f,
+  0x6e, 0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x4c, 0x6f, 0x61, 0x64, 0x41, 0x6e, 0x64, 0x41, 0x70, 0x70,
+  0x6c, 0x79, 0x41, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x28,
+  0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
+  0x20, 0x67, 0x65, 0x74, 0x20, 0x61, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76,
+  0x65, 0x64, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x75, 0x73, 0x65, 0x64,
+  0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x61, 0x73, 0x74,
+  0x55, 0x73, 0x65, 0x64, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x20, 0x3d, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f,
+  0x72, 0x61, 0x67, 0x65, 0x5f, 0x67, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61,
+  0x41, 0x73, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x27, 0x75, 0x73,
+  0x65, 0x72, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73,
+  0x5f, 0x6c, 0x61, 0x73, 0x74, 0x27, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6c, 0x61, 0x73, 0x74, 0x55,
+  0x73, 0x65, 0x64, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x29,
+  0x20, 0x7b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28,
+  0x27, 0x41, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x20, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x75, 0x6e,
+  0x64, 0x2c, 0x20, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x69, 0x6e, 0x67,
+  0x27, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x20, 0x3d, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x73, 0x65,
+  0x64, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e,
+  0x6c, 0x6f, 0x67, 0x28, 0x27, 0x4e, 0x6f, 0x20, 0x61, 0x75, 0x74, 0x6f,
+  0x73, 0x61, 0x76, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x2c, 0x20, 0x75, 0x73,
+  0x69, 0x6e, 0x67, 0x20, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x20,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x27, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x6e, 0x6f,
+  0x20, 0x61, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x20, 0x6c,
+  0x61, 0x73, 0x74, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, 0x66, 0x6f,
+  0x75, 0x6e, 0x64, 0x2c, 0x20, 0x73, 0x6f, 0x20, 0x6c, 0x6f, 0x61, 0x64,
+  0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c,
+  0x74, 0x2e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x52, 0x65, 0x73, 0x65, 0x74, 0x54, 0x6f, 0x44, 0x65, 0x66, 0x61, 0x75,
+  0x6c, 0x74, 0x28, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x41, 0x70, 0x70,
+  0x6c, 0x79, 0x69, 0x6e, 0x67, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
+  0x2f, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
+  0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x20, 0x64, 0x61,
+  0x74, 0x61, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x28, 0x73, 0x65, 0x6c, 0x65, 0x63,
+  0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
+  0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28,
+  0x73, 0x61, 0x76, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d,
+  0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x63, 0x6f, 0x6e, 0x73,
+  0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x73, 0x65, 0x6c, 0x65,
+  0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f,
+  0x6e, 0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x41, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x28, 0x29,
+  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x54, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x73,
+  0x61, 0x76, 0x65, 0x2e, 0x2e, 0x2e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x73, 0x65, 0x6c, 0x65, 0x63,
+  0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6e, 0x61,
+  0x6d, 0x65, 0x20, 0x3d, 0x3d, 0x20, 0x27, 0x64, 0x65, 0x66, 0x61, 0x75,
+  0x6c, 0x74, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x77, 0x65, 0x20, 0x64, 0x6f, 0x6e,
+  0x27, 0x74, 0x20, 0x77, 0x61, 0x6e, 0x74, 0x20, 0x74, 0x6f, 0x20, 0x73,
+  0x61, 0x76, 0x65, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x65, 0x66,
+  0x61, 0x75, 0x6c, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x2c, 0x20, 0x73, 0x6f, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20,
+  0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x20, 0x61, 0x20, 0x6e, 0x65, 0x77,
+  0x20, 0x6f, 0x6e, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x6c, 0x65, 0x74, 0x20, 0x6e, 0x65, 0x77, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x20, 0x3d, 0x20, 0x27,
+  0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x2d, 0x27, 0x20, 0x2b, 0x20, 0x44, 0x61, 0x74, 0x65, 0x2e, 0x6e, 0x6f,
+  0x77, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67,
+  0x28, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
+  0x65, 0x74, 0x20, 0x6e, 0x65, 0x77, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x27, 0x6e, 0x61, 0x6d, 0x65,
+  0x27, 0x3a, 0x20, 0x6e, 0x65, 0x77, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x27, 0x64, 0x61, 0x74,
+  0x61, 0x27, 0x3a, 0x20, 0x7b, 0x20, 0x27, 0x73, 0x65, 0x73, 0x73, 0x69,
+  0x6f, 0x6e, 0x27, 0x3a, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x27, 0x70, 0x61, 0x72,
+  0x61, 0x6d, 0x73, 0x27, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x20, 0x7d, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x53, 0x61, 0x76,
+  0x69, 0x6e, 0x67, 0x20, 0x61, 0x73, 0x20, 0x27, 0x20, 0x2b, 0x20, 0x6e,
+  0x65, 0x77, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x4e, 0x61,
+  0x6d, 0x65, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x2f, 0x2f, 0x20, 0x73, 0x61, 0x76, 0x65, 0x20, 0x69, 0x6e, 0x20,
+  0x74, 0x68, 0x65, 0x20, 0x61, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65,
+  0x20, 0x73, 0x6c, 0x6f, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72,
+  0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x74, 0x44, 0x61, 0x74, 0x61, 0x46,
+  0x72, 0x6f, 0x6d, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x27, 0x75,
+  0x73, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x73, 0x5f, 0x6c, 0x61, 0x73, 0x74, 0x27, 0x2c, 0x20, 0x6e, 0x65, 0x77,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x29, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x61, 0x6e,
+  0x64, 0x20, 0x6c, 0x6f, 0x61, 0x64, 0x20, 0x69, 0x74, 0x20, 0x62, 0x61,
+  0x63, 0x6b, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x79,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73, 0x65,
+  0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x4c, 0x6f, 0x61,
+  0x64, 0x41, 0x6e, 0x64, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x41, 0x75, 0x74,
+  0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x28, 0x29, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6f, 0x63, 0x61,
+  0x6c, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65,
+  0x74, 0x44, 0x61, 0x74, 0x61, 0x46, 0x72, 0x6f, 0x6d, 0x4f, 0x62, 0x6a,
+  0x65, 0x63, 0x74, 0x28, 0x27, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x74, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x5f, 0x6c, 0x61, 0x73, 0x74,
+  0x27, 0x2c, 0x20, 0x7b, 0x20, 0x27, 0x6e, 0x61, 0x6d, 0x65, 0x27, 0x3a,
+  0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65,
+  0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x27, 0x64,
+  0x61, 0x74, 0x61, 0x27, 0x3a, 0x20, 0x7b, 0x20, 0x27, 0x73, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x27, 0x3a, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69,
+  0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x27, 0x70,
+  0x61, 0x72, 0x61, 0x6d, 0x73, 0x27, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61,
+  0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x20, 0x7d,
+  0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x28, 0x27, 0x43, 0x68,
+  0x65, 0x63, 0x6b, 0x69, 0x6e, 0x67, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61,
+  0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x20, 0x6c, 0x61, 0x73,
+  0x74, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73,
+  0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x4c, 0x6f,
+  0x61, 0x64, 0x41, 0x6e, 0x64, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x41, 0x75,
+  0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x64, 0x28, 0x29, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x2f, 0x2a, 0x20, 0x45, 0x4e, 0x44, 0x3a, 0x20, 0x53,
+  0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x73,
+  0x74, 0x6f, 0x72, 0x69, 0x6e, 0x67, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70,
+  0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x20,
+  0x61, 0x6e, 0x64, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65,
+  0x72, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x73, 0x65,
+  0x72, 0x20, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x53, 0x74, 0x6f, 0x72, 0x61,
+  0x67, 0x65, 0x20, 0x2a, 0x2f, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x53, 0x74,
+  0x61, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c,
+  0x28, 0x6e, 0x75, 0x6c, 0x6c, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c,
+  0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c,
+  0x28, 0x6e, 0x75, 0x6c, 0x6c, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
+  0x74, 0x69, 0x6e, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x75,
+  0x74, 0x65, 0x64, 0x28, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x63, 0x6f,
+  0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x20, 0x3d, 0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x20, 0x29,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
+  0x68, 0x61, 0x74, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x20, 0x3d,
+  0x20, 0x63, 0x6f, 0x6d, 0x70, 0x75, 0x74, 0x65, 0x64, 0x28, 0x28, 0x29,
+  0x20, 0x3d, 0x3e, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63,
+  0x72, 0x69, 0x70, 0x74, 0x2e, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x20,
+  0x3e, 0x20, 0x30, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
+  0x70, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x28,
+  0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x29, 0x20,
+  0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73,
+  0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x20, 0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
   0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x73, 0x74, 0x6f, 0x70, 0x3a, 0x20, 0x5b, 0x22, 0x3c,
-  0x2f, 0x73, 0x3e, 0x22, 0x2c, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
-  0x74, 0x65, 0x28, 0x22, 0x7b, 0x7b, 0x63, 0x68, 0x61, 0x72, 0x7d, 0x7d,
-  0x3a, 0x22, 0x29, 0x2c, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
-  0x65, 0x28, 0x22, 0x7b, 0x7b, 0x75, 0x73, 0x65, 0x72, 0x7d, 0x7d, 0x3a,
-  0x22, 0x29, 0x5d, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
-  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20,
-  0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74,
-  0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c,
-  0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20,
-  0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c,
-  0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65,
-  0x72, 0x3a, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65,
-  0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x29, 0x29, 0x20,
-  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
-  0x6e, 0x73, 0x74, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x63,
-  0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x3b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65,
-  0x6e, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x20, 0x2b, 0x3d,
-  0x20, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
-  0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x2f, 0x2f, 0x20, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x20, 0x6c, 0x65,
-  0x61, 0x64, 0x69, 0x6e, 0x67, 0x20, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73,
-  0x70, 0x61, 0x63, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x73, 0x73,
-  0x61, 0x67, 0x65, 0x20, 0x3d, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e,
-  0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x72, 0x65, 0x70,
-  0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5e, 0x5c, 0x73, 0x2b, 0x2f, 0x2c,
-  0x20, 0x22, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
-  0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x28, 0x5b, 0x2e, 0x2e, 0x2e, 0x68,
-  0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x2c, 0x20, 0x5b, 0x22, 0x7b, 0x7b,
-  0x63, 0x68, 0x61, 0x72, 0x7d, 0x7d, 0x22, 0x2c, 0x20, 0x63, 0x75, 0x72,
-  0x72, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5d,
-  0x5d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x69, 0x66, 0x20, 0x28, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f,
-  0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c,
-  0x6f, 0x67, 0x28, 0x22, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69,
-  0x6f, 0x6e, 0x20, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x3a,
-  0x20, 0x27, 0x22, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74,
-  0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2c, 0x20, 0x22, 0x27, 0x2c,
-  0x20, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x3a, 0x20, 0x22, 0x2c,
-  0x20, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70,
+  0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
+  0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x73, 0x74,
+  0x72, 0x2c, 0x20, 0x65, 0x78, 0x74, 0x72, 0x61, 0x53, 0x65, 0x74, 0x74,
+  0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x73, 0x65, 0x74,
+  0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x65, 0x73, 0x73,
+  0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x78, 0x74,
+  0x72, 0x61, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x65,
+  0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e,
+  0x2e, 0x2e, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x2c, 0x20,
+  0x2e, 0x2e, 0x2e, 0x65, 0x78, 0x74, 0x72, 0x61, 0x53, 0x65, 0x74, 0x74,
+  0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
+  0x74, 0x75, 0x72, 0x6e, 0x20, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28,
+  0x73, 0x74, 0x72, 0x29, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65,
+  0x41, 0x6c, 0x6c, 0x28, 0x2f, 0x5c, 0x7b, 0x5c, 0x7b, 0x28, 0x2e, 0x2a,
+  0x3f, 0x29, 0x5c, 0x7d, 0x5c, 0x7d, 0x2f, 0x67, 0x2c, 0x20, 0x28, 0x5f,
+  0x2c, 0x20, 0x6b, 0x65, 0x79, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x74, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x73, 0x65, 0x74, 0x74, 0x69,
+  0x6e, 0x67, 0x73, 0x5b, 0x6b, 0x65, 0x79, 0x5d, 0x29, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f,
+  0x2f, 0x20, 0x73, 0x65, 0x6e, 0x64, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x20, 0x74, 0x6f, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
+  0x68, 0x61, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
+  0x28, 0x6d, 0x73, 0x67, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x6f, 0x6e,
+  0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67,
+  0x28, 0x27, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x20, 0x72, 0x75,
+  0x6e, 0x6e, 0x69, 0x6e, 0x67, 0x2e, 0x2e, 0x2e, 0x27, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75,
+  0x72, 0x6e, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f,
+  0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d,
+  0x20, 0x6e, 0x65, 0x77, 0x20, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x43, 0x6f,
+  0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x28, 0x29, 0x3b, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73,
+  0x63, 0x72, 0x69, 0x70, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x28,
+  0x5b, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63,
+  0x72, 0x69, 0x70, 0x74, 0x2c, 0x20, 0x5b, 0x22, 0x7b, 0x7b, 0x75, 0x73,
+  0x65, 0x72, 0x7d, 0x7d, 0x22, 0x2c, 0x20, 0x6d, 0x73, 0x67, 0x5d, 0x5d,
+  0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20, 0x3d, 0x20,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x73, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x3a, 0x20, 0x6d, 0x73, 0x67, 0x2c, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79,
+  0x3a, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
+  0x70, 0x74, 0x2e, 0x66, 0x6c, 0x61, 0x74, 0x4d, 0x61, 0x70, 0x28, 0x28,
+  0x5b, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x5d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x28, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x68, 0x69, 0x73, 0x74, 0x6f,
+  0x72, 0x79, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2c, 0x20,
+  0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x7d, 0x29, 0x29, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x28, 0x22,
+  0x5c, 0x6e, 0x22, 0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x7d, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
+  0x65, 0x74, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4d, 0x65,
+  0x73, 0x73, 0x61, 0x67, 0x65, 0x20, 0x3d, 0x20, 0x27, 0x27, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x20, 0x3d, 0x20, 0x73, 0x65,
+  0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e,
+  0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20,
+  0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x73, 0x74, 0x6f, 0x70, 0x3a, 0x20, 0x5b, 0x22, 0x3c, 0x2f, 0x73,
+  0x3e, 0x22, 0x2c, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x28, 0x22, 0x7b, 0x7b, 0x63, 0x68, 0x61, 0x72, 0x7d, 0x7d, 0x3a, 0x22,
+  0x29, 0x2c, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28,
+  0x22, 0x7b, 0x7b, 0x75, 0x73, 0x65, 0x72, 0x7d, 0x7d, 0x3a, 0x22, 0x29,
+  0x5d, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77,
+  0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
+  0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
+  0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x6c, 0x6c,
+  0x61, 0x6d, 0x61, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b,
+  0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x3a,
+  0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x29, 0x29, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x63, 0x68, 0x75,
+  0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74,
+  0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x20, 0x2b, 0x3d, 0x20, 0x64,
+  0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
+  0x20, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x20, 0x6c, 0x65, 0x61, 0x64,
+  0x69, 0x6e, 0x67, 0x20, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61,
+  0x63, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
+  0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67,
+  0x65, 0x20, 0x3d, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4d,
+  0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61,
+  0x63, 0x65, 0x28, 0x2f, 0x5e, 0x5c, 0x73, 0x2b, 0x2f, 0x2c, 0x20, 0x22,
+  0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x55, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x28, 0x5b, 0x2e, 0x2e, 0x2e, 0x68, 0x69, 0x73,
+  0x74, 0x6f, 0x72, 0x79, 0x2c, 0x20, 0x5b, 0x22, 0x7b, 0x7b, 0x63, 0x68,
+  0x61, 0x72, 0x7d, 0x7d, 0x22, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65,
+  0x6e, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5d, 0x5d, 0x29,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66,
+  0x20, 0x28, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29,
+  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67,
+  0x28, 0x22, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e,
+  0x20, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x65, 0x64, 0x3a, 0x20, 0x27,
+  0x22, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x4d, 0x65,
+  0x73, 0x73, 0x61, 0x67, 0x65, 0x2c, 0x20, 0x22, 0x27, 0x2c, 0x20, 0x73,
+  0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x64,
+  0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x69, 0x66, 0x20, 0x28, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69,
+  0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
+  0x53, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
+  0x3d, 0x20, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e,
+  0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c,
+  0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20,
+  0x6e, 0x75, 0x6c, 0x6c, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f,
+  0x6e, 0x20, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x49, 0x6e, 0x70,
+  0x75, 0x74, 0x28, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x20, 0x3d, 0x20, 0x75, 0x73, 0x65, 0x53, 0x69, 0x67, 0x6e,
+  0x61, 0x6c, 0x28, 0x22, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x74, 0x6f, 0x70,
+  0x20, 0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x2e, 0x70, 0x72,
+  0x65, 0x76, 0x65, 0x6e, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74,
+  0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x69, 0x66, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c,
+  0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+  0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
+  0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x20, 0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x65, 0x74, 0x20,
+  0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x74, 0x6f, 0x70, 0x28,
+  0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x55, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x28, 0x5b, 0x5d, 0x29, 0x3b, 0x0a, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x64, 0x61, 0x74, 0x61, 0x2e,
-  0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, 0x61,
-  0x6d, 0x61, 0x53, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d,
-  0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
-  0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
-  0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74,
-  0x69, 0x6f, 0x6e, 0x20, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x49,
-  0x6e, 0x70, 0x75, 0x74, 0x28, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x20, 0x3d, 0x20, 0x75, 0x73, 0x65, 0x53, 0x69,
-  0x67, 0x6e, 0x61, 0x6c, 0x28, 0x22, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x74,
-  0x6f, 0x70, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20,
-  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x2e,
-  0x70, 0x72, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75,
-  0x6c, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f,
-  0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20,
-  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76,
-  0x61, 0x6c, 0x75, 0x65, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x76,
-  0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x65,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6d, 0x69,
   0x74, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x74, 0x6f,
   0x70, 0x28, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
-  0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x28, 0x5b, 0x5d, 0x29, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62,
-  0x6d, 0x69, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e,
-  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73,
-  0x74, 0x6f, 0x70, 0x28, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x63, 0x68, 0x61, 0x74, 0x28, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d,
-  0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
-  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x65, 0x6e, 0x74, 0x65, 0x72, 0x53, 0x75, 0x62, 0x6d, 0x69,
-  0x74, 0x73, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x29,
-  0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2e,
-  0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x3d, 0x3d, 0x3d, 0x20, 0x31, 0x33,
-  0x20, 0x26, 0x26, 0x20, 0x21, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x73,
-  0x68, 0x69, 0x66, 0x74, 0x4b, 0x65, 0x79, 0x29, 0x20, 0x7b, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x75, 0x62,
-  0x6d, 0x69, 0x74, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c,
-  0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66,
-  0x6f, 0x72, 0x6d, 0x20, 0x6f, 0x6e, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74,
-  0x3d, 0x24, 0x7b, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x7d, 0x3e, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64,
-  0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65,
-  0x61, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74,
-  0x22, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x3d, 0x32, 0x20, 0x6f, 0x6e, 0x6b,
-  0x65, 0x79, 0x70, 0x72, 0x65, 0x73, 0x73, 0x3d, 0x24, 0x7b, 0x65, 0x6e,
-  0x74, 0x65, 0x72, 0x53, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x73, 0x7d, 0x20,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x65, 0x73,
-  0x73, 0x61, 0x67, 0x65, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70,
-  0x75, 0x74, 0x3d, 0x24, 0x7b, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20,
-  0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x65, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x20, 0x70, 0x6c, 0x61, 0x63,
-  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x3d, 0x22, 0x53, 0x61, 0x79,
-  0x20, 0x73, 0x6f, 0x6d, 0x65, 0x74, 0x68, 0x69, 0x6e, 0x67, 0x2e, 0x2e,
-  0x2e, 0x22, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76,
-  0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x72, 0x69, 0x67, 0x68,
-  0x74, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20,
-  0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74,
-  0x22, 0x20, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x3d, 0x24,
-  0x7b, 0x21, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x20, 0x3e, 0x53, 0x65, 0x6e,
-  0x64, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69,
-  0x63, 0x6b, 0x3d, 0x24, 0x7b, 0x73, 0x74, 0x6f, 0x70, 0x7d, 0x20, 0x64,
-  0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x67, 0x65,
-  0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x7d, 0x3e, 0x53, 0x74,
-  0x6f, 0x70, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x3c, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20, 0x6f, 0x6e, 0x63, 0x6c,
-  0x69, 0x63, 0x6b, 0x3d, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x65, 0x74, 0x7d,
-  0x3e, 0x52, 0x65, 0x73, 0x65, 0x74, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74,
-  0x6f, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x6f, 0x72, 0x6d, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x43, 0x68, 0x61, 0x74, 0x4c, 0x6f, 0x67, 0x20, 0x3d, 0x20,
-  0x28, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
-  0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x20, 0x3d, 0x20,
-  0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x2e, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x20,
-  0x3d, 0x20, 0x75, 0x73, 0x65, 0x52, 0x65, 0x66, 0x28, 0x6e, 0x75, 0x6c,
-  0x6c, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73,
-  0x65, 0x45, 0x66, 0x66, 0x65, 0x63, 0x74, 0x28, 0x28, 0x29, 0x20, 0x3d,
+  0x20, 0x20, 0x63, 0x68, 0x61, 0x74, 0x28, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20, 0x22,
+  0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x65, 0x6e, 0x74, 0x65, 0x72, 0x53, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x73,
+  0x20, 0x3d, 0x20, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d,
   0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x2f, 0x2f, 0x20, 0x73, 0x63, 0x72, 0x6f, 0x6c, 0x6c, 0x20, 0x74, 0x6f,
-  0x20, 0x62, 0x6f, 0x74, 0x74, 0x6f, 0x6d, 0x20, 0x28, 0x69, 0x66, 0x20,
-  0x6e, 0x65, 0x65, 0x64, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x61,
-  0x72, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x61,
-  0x69, 0x6e, 0x65, 0x72, 0x2e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74,
-  0x2e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65,
-  0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x69, 0x66, 0x20, 0x28, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x20, 0x26,
-  0x26, 0x20, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x73, 0x63, 0x72,
-  0x6f, 0x6c, 0x6c, 0x48, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x3c, 0x3d,
-  0x20, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x73, 0x63, 0x72, 0x6f,
-  0x6c, 0x6c, 0x54, 0x6f, 0x70, 0x20, 0x2b, 0x20, 0x70, 0x61, 0x72, 0x65,
-  0x6e, 0x74, 0x2e, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x48, 0x65, 0x69,
-  0x67, 0x68, 0x74, 0x20, 0x2b, 0x20, 0x33, 0x30, 0x30, 0x29, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70,
+  0x69, 0x66, 0x20, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x68,
+  0x69, 0x63, 0x68, 0x20, 0x3d, 0x3d, 0x3d, 0x20, 0x31, 0x33, 0x20, 0x26,
+  0x26, 0x20, 0x21, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x73, 0x68, 0x69,
+  0x66, 0x74, 0x4b, 0x65, 0x79, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x73, 0x75, 0x62, 0x6d, 0x69,
+  0x74, 0x28, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72,
+  0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x6f, 0x72,
+  0x6d, 0x20, 0x6f, 0x6e, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x3d, 0x24,
+  0x7b, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x7d, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, 0x20,
+  0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74, 0x22, 0x20,
+  0x72, 0x6f, 0x77, 0x73, 0x3d, 0x32, 0x20, 0x6f, 0x6e, 0x6b, 0x65, 0x79,
+  0x70, 0x72, 0x65, 0x73, 0x73, 0x3d, 0x24, 0x7b, 0x65, 0x6e, 0x74, 0x65,
+  0x72, 0x53, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x73, 0x7d, 0x20, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x65, 0x73, 0x73, 0x61,
+  0x67, 0x65, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74,
+  0x3d, 0x24, 0x7b, 0x28, 0x65, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x6d, 0x65,
+  0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
+  0x3d, 0x20, 0x65, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x7d, 0x20, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x68,
+  0x6f, 0x6c, 0x64, 0x65, 0x72, 0x3d, 0x22, 0x53, 0x61, 0x79, 0x20, 0x73,
+  0x6f, 0x6d, 0x65, 0x74, 0x68, 0x69, 0x6e, 0x67, 0x2e, 0x2e, 0x2e, 0x22,
+  0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x20, 0x63,
+  0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x72, 0x69, 0x67, 0x68, 0x74, 0x22,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x3c, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20, 0x74, 0x79,
+  0x70, 0x65, 0x3d, 0x22, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x74, 0x22, 0x20,
+  0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x21,
+  0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x7d, 0x20, 0x3e, 0x53, 0x65, 0x6e, 0x64, 0x3c,
+  0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x62, 0x75,
+  0x74, 0x74, 0x6f, 0x6e, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69, 0x63, 0x6b,
+  0x3d, 0x24, 0x7b, 0x73, 0x74, 0x6f, 0x70, 0x7d, 0x20, 0x64, 0x69, 0x73,
+  0x61, 0x62, 0x6c, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x67, 0x65, 0x6e, 0x65,
+  0x72, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x7d, 0x3e, 0x53, 0x74, 0x6f, 0x70,
+  0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x62,
+  0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69, 0x63,
+  0x6b, 0x3d, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x65, 0x74, 0x7d, 0x3e, 0x52,
+  0x65, 0x73, 0x65, 0x74, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x6f, 0x72, 0x6d, 0x3e, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x43, 0x68, 0x61, 0x74, 0x4c, 0x6f, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x70,
+  0x72, 0x6f, 0x70, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d,
+  0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x73, 0x65,
+  0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e,
+  0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x3b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x20, 0x3d, 0x20,
+  0x75, 0x73, 0x65, 0x52, 0x65, 0x66, 0x28, 0x6e, 0x75, 0x6c, 0x6c, 0x29,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x75, 0x73, 0x65, 0x45,
+  0x66, 0x66, 0x65, 0x63, 0x74, 0x28, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
+  0x20, 0x73, 0x63, 0x72, 0x6f, 0x6c, 0x6c, 0x20, 0x74, 0x6f, 0x20, 0x62,
+  0x6f, 0x74, 0x74, 0x6f, 0x6d, 0x20, 0x28, 0x69, 0x66, 0x20, 0x6e, 0x65,
+  0x65, 0x64, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x61, 0x72, 0x65,
+  0x6e, 0x74, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e,
+  0x65, 0x72, 0x2e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x70,
+  0x61, 0x72, 0x65, 0x6e, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66,
+  0x20, 0x28, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x20, 0x26, 0x26, 0x20,
+  0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x73, 0x63, 0x72, 0x6f, 0x6c,
+  0x6c, 0x48, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x3c, 0x3d, 0x20, 0x70,
   0x61, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x73, 0x63, 0x72, 0x6f, 0x6c, 0x6c,
-  0x54, 0x6f, 0x28, 0x30, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74,
-  0x2e, 0x73, 0x63, 0x72, 0x6f, 0x6c, 0x6c, 0x48, 0x65, 0x69, 0x67, 0x68,
-  0x74, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x2c, 0x20, 0x5b, 0x6d,
-  0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x5d, 0x29, 0x0a, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
-  0x68, 0x61, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x5b,
-  0x75, 0x73, 0x65, 0x72, 0x2c, 0x20, 0x6d, 0x73, 0x67, 0x5d, 0x29, 0x20,
-  0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c,
-  0x60, 0x3c, 0x70, 0x20, 0x6b, 0x65, 0x79, 0x3d, 0x24, 0x7b, 0x6d, 0x73,
-  0x67, 0x7d, 0x3e, 0x3c, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x24,
-  0x7b, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x75, 0x73,
-  0x65, 0x72, 0x29, 0x7d, 0x3a, 0x3c, 0x2f, 0x73, 0x74, 0x72, 0x6f, 0x6e,
-  0x67, 0x3e, 0x20, 0x3c, 0x24, 0x7b, 0x4d, 0x61, 0x72, 0x6b, 0x64, 0x6f,
-  0x77, 0x6e, 0x69, 0x73, 0x68, 0x7d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x3d,
-  0x24, 0x7b, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x6d,
-  0x73, 0x67, 0x29, 0x7d, 0x20, 0x2f, 0x3e, 0x3c, 0x2f, 0x70, 0x3e, 0x60,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
-  0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x69,
-  0x64, 0x3d, 0x22, 0x63, 0x68, 0x61, 0x74, 0x22, 0x20, 0x72, 0x65, 0x66,
-  0x3d, 0x24, 0x7b, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72,
-  0x7d, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x24, 0x7b, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x2e,
-  0x66, 0x6c, 0x61, 0x74, 0x4d, 0x61, 0x70, 0x28, 0x63, 0x68, 0x61, 0x74,
-  0x4c, 0x69, 0x6e, 0x65, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x3c, 0x2f, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e,
-  0x3e, 0x60, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x43, 0x6f,
-  0x6e, 0x66, 0x69, 0x67, 0x46, 0x6f, 0x72, 0x6d, 0x20, 0x3d, 0x20, 0x28,
-  0x70, 0x72, 0x6f, 0x70, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
-  0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f,
-  0x6e, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20,
-  0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73,
-  0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20,
-  0x5b, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e,
-  0x61, 0x6d, 0x65, 0x5d, 0x3a, 0x20, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72,
-  0x67, 0x65, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
-  0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73,
+  0x54, 0x6f, 0x70, 0x20, 0x2b, 0x20, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74,
+  0x2e, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x48, 0x65, 0x69, 0x67, 0x68,
+  0x74, 0x20, 0x2b, 0x20, 0x33, 0x30, 0x30, 0x29, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x61, 0x72,
+  0x65, 0x6e, 0x74, 0x2e, 0x73, 0x63, 0x72, 0x6f, 0x6c, 0x6c, 0x54, 0x6f,
+  0x28, 0x30, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x2e, 0x73,
+  0x63, 0x72, 0x6f, 0x6c, 0x6c, 0x48, 0x65, 0x69, 0x67, 0x68, 0x74, 0x29,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x2c, 0x20, 0x5b, 0x6d, 0x65, 0x73,
+  0x73, 0x61, 0x67, 0x65, 0x73, 0x5d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x61,
+  0x74, 0x4c, 0x69, 0x6e, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x5b, 0x75, 0x73,
+  0x65, 0x72, 0x2c, 0x20, 0x6d, 0x73, 0x67, 0x5d, 0x29, 0x20, 0x3d, 0x3e,
+  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72,
+  0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x3c,
+  0x70, 0x20, 0x6b, 0x65, 0x79, 0x3d, 0x24, 0x7b, 0x6d, 0x73, 0x67, 0x7d,
+  0x3e, 0x3c, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x24, 0x7b, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x75, 0x73, 0x65, 0x72,
+  0x29, 0x7d, 0x3a, 0x3c, 0x2f, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e,
+  0x20, 0x3c, 0x24, 0x7b, 0x4d, 0x61, 0x72, 0x6b, 0x64, 0x6f, 0x77, 0x6e,
+  0x69, 0x73, 0x68, 0x7d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x3d, 0x24, 0x7b,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x28, 0x6d, 0x73, 0x67,
+  0x29, 0x7d, 0x20, 0x2f, 0x3e, 0x3c, 0x2f, 0x70, 0x3e, 0x60, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74,
+  0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x69, 0x64, 0x3d,
+  0x22, 0x63, 0x68, 0x61, 0x74, 0x22, 0x20, 0x72, 0x65, 0x66, 0x3d, 0x24,
+  0x7b, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x7d, 0x3e,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24,
+  0x7b, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x2e, 0x66, 0x6c,
+  0x61, 0x74, 0x4d, 0x61, 0x70, 0x28, 0x63, 0x68, 0x61, 0x74, 0x4c, 0x69,
+  0x6e, 0x65, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x2f, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x3e, 0x60,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x43, 0x6f, 0x6e, 0x66,
+  0x69, 0x67, 0x46, 0x6f, 0x72, 0x6d, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72,
+  0x6f, 0x70, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x20,
+  0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x73, 0x65,
+  0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
+  0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x73, 0x65, 0x73, 0x73, 0x69,
+  0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b, 0x65,
+  0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61, 0x6d,
+  0x65, 0x5d, 0x3a, 0x20, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65,
+  0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
+  0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x70, 0x61, 0x72,
+  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20,
+  0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b, 0x65, 0x6c, 0x2e, 0x74,
+  0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x5d, 0x3a,
+  0x20, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x70, 0x64, 0x61, 0x74,
+  0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x46, 0x6c, 0x6f, 0x61, 0x74,
   0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x70,
   0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
   0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d,
   0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b, 0x65, 0x6c,
   0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61, 0x6d, 0x65,
-  0x5d, 0x3a, 0x20, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
+  0x5d, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61,
+  0x74, 0x28, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x70, 0x64,
-  0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x46, 0x6c, 0x6f,
-  0x61, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e,
-  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72,
-  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b,
-  0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61,
-  0x6d, 0x65, 0x5d, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c,
-  0x6f, 0x61, 0x74, 0x28, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65,
-  0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20, 0x7d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75,
-  0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49,
-  0x6e, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e,
-  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72,
-  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b,
-  0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61,
-  0x6d, 0x65, 0x5d, 0x3a, 0x20, 0x4d, 0x61, 0x74, 0x68, 0x2e, 0x66, 0x6c,
-  0x6f, 0x6f, 0x72, 0x28, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f,
-  0x61, 0x74, 0x28, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x29, 0x20, 0x7d, 0x0a, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
-  0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53,
-  0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64,
-  0x65, 0x72, 0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x28,
-  0x27, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
-  0x6e, 0x73, 0x74, 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, 0x72,
+  0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74,
+  0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x70,
+  0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
+  0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d,
+  0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x20, 0x5b, 0x65, 0x6c,
+  0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x6e, 0x61, 0x6d, 0x65,
+  0x5d, 0x3a, 0x20, 0x4d, 0x61, 0x74, 0x68, 0x2e, 0x66, 0x6c, 0x6f, 0x6f,
+  0x72, 0x28, 0x70, 0x61, 0x72, 0x73, 0x65, 0x46, 0x6c, 0x6f, 0x61, 0x74,
+  0x28, 0x65, 0x6c, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x29, 0x29, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x72,
   0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53, 0x63, 0x68,
   0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72,
-  0x20, 0x3d, 0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x67,
-  0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53, 0x63,
-  0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65,
-  0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20, 0x65, 0x6c,
-  0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
-  0x74, 0x20, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x4a, 0x53, 0x4f,
-  0x4e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x47, 0x72, 0x61, 0x6d, 0x6d,
-  0x61, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79,
-  0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d,
-  0x61, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72,
-  0x73, 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x2e, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x29,
+  0x20, 0x3d, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x28, 0x27, 0x27,
+  0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, 0x72, 0x61, 0x6d,
+  0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53, 0x63, 0x68, 0x65, 0x6d,
+  0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x20, 0x3d,
+  0x20, 0x28, 0x65, 0x6c, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x67, 0x72, 0x61,
+  0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53, 0x63, 0x68, 0x65,
+  0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3d, 0x20, 0x65, 0x6c, 0x2e, 0x74,
+  0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
+  0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x4a, 0x53, 0x4f, 0x4e, 0x53,
+  0x63, 0x68, 0x65, 0x6d, 0x61, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72,
+  0x20, 0x3d, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
-  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
-  0x65, 0x72, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x53, 0x63, 0x68,
-  0x65, 0x6d, 0x61, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x72,
-  0x28, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f,
-  0x6e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f,
-  0x72, 0x64, 0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x0a, 0x20,
+  0x6f, 0x6e, 0x73, 0x74, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x20,
+  0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65,
+  0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x2e, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x72,
+  0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x53, 0x63, 0x68, 0x65, 0x6d,
+  0x61, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x72, 0x28, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x2c, 0x27, 0x29,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x28, 0x28,
-  0x61, 0x63, 0x63, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x2c, 0x20, 0x69, 0x29,
-  0x20, 0x3d, 0x3e, 0x20, 0x28, 0x7b, 0x2e, 0x2e, 0x2e, 0x61, 0x63, 0x63,
-  0x2c, 0x20, 0x5b, 0x63, 0x75, 0x72, 0x2e, 0x74, 0x72, 0x69, 0x6d, 0x28,
-  0x29, 0x5d, 0x3a, 0x20, 0x69, 0x7d, 0x29, 0x2c, 0x20, 0x7b, 0x7d, 0x29,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x29,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
-  0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x72, 0x2e, 0x76, 0x69, 0x73,
-  0x69, 0x74, 0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2c, 0x20, 0x27,
-  0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x20, 0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72,
-  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67,
-  0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x3a, 0x20, 0x63, 0x6f, 0x6e, 0x76,
-  0x65, 0x72, 0x74, 0x65, 0x72, 0x2e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74,
-  0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x28, 0x29, 0x2c, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74,
-  0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x6c, 0x65, 0x72, 0x74,
-  0x28, 0x60, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x20, 0x66, 0x61,
-  0x69, 0x6c, 0x65, 0x64, 0x3a, 0x20, 0x24, 0x7b, 0x65, 0x2e, 0x6d, 0x65,
-  0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
-  0x6e, 0x73, 0x74, 0x20, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65,
-  0x6c, 0x64, 0x20, 0x3d, 0x20, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c,
-  0x2c, 0x20, 0x6d, 0x61, 0x78, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x2c, 0x20,
-  0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x2c, 0x20,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
-  0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76,
+  0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53,
+  0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64,
+  0x65, 0x72, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e,
+  0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x2c, 0x27, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x2e, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x28, 0x28, 0x61, 0x63,
+  0x63, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x2c, 0x20, 0x69, 0x29, 0x20, 0x3d,
+  0x3e, 0x20, 0x28, 0x7b, 0x2e, 0x2e, 0x2e, 0x61, 0x63, 0x63, 0x2c, 0x20,
+  0x5b, 0x63, 0x75, 0x72, 0x2e, 0x74, 0x72, 0x69, 0x6d, 0x28, 0x29, 0x5d,
+  0x3a, 0x20, 0x69, 0x7d, 0x29, 0x2c, 0x20, 0x7b, 0x7d, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
+  0x76, 0x65, 0x72, 0x74, 0x65, 0x72, 0x2e, 0x76, 0x69, 0x73, 0x69, 0x74,
+  0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2c, 0x20, 0x27, 0x27, 0x29,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70,
+  0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20,
+  0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d,
+  0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2c, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x72, 0x61,
+  0x6d, 0x6d, 0x61, 0x72, 0x3a, 0x20, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72,
+  0x74, 0x65, 0x72, 0x2e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x47, 0x72,
+  0x61, 0x6d, 0x6d, 0x61, 0x72, 0x28, 0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68,
+  0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x6c, 0x65, 0x72, 0x74, 0x28, 0x60,
+  0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x20, 0x66, 0x61, 0x69, 0x6c,
+  0x65, 0x64, 0x3a, 0x20, 0x24, 0x7b, 0x65, 0x2e, 0x6d, 0x65, 0x73, 0x73,
+  0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64,
+  0x20, 0x3d, 0x20, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x2c, 0x20,
+  0x6d, 0x61, 0x78, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x2c, 0x20, 0x6e, 0x61,
+  0x6d, 0x65, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x2c, 0x20, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
+  0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22,
+  0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x3e, 0x24, 0x7b, 0x6c,
+  0x61, 0x62, 0x65, 0x6c, 0x7d, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c,
   0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72,
-  0x3d, 0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x3e, 0x24,
-  0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x7d, 0x3c, 0x2f, 0x6c, 0x61, 0x62,
-  0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74,
-  0x79, 0x70, 0x65, 0x3d, 0x22, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x22, 0x20,
-  0x69, 0x64, 0x3d, 0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22,
-  0x20, 0x6d, 0x69, 0x6e, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x69, 0x6e, 0x7d,
-  0x22, 0x20, 0x6d, 0x61, 0x78, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x61, 0x78,
-  0x7d, 0x22, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3d, 0x22, 0x24, 0x7b, 0x73,
-  0x74, 0x65, 0x70, 0x7d, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22,
-  0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x20, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d,
-  0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b,
-  0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73,
-  0x46, 0x6c, 0x6f, 0x61, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x73,
-  0x70, 0x61, 0x6e, 0x3e, 0x24, 0x7b, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d,
-  0x3c, 0x2f, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e,
+  0x20, 0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70,
+  0x65, 0x3d, 0x22, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x22, 0x20, 0x69, 0x64,
+  0x3d, 0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x20, 0x6d,
+  0x69, 0x6e, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x69, 0x6e, 0x7d, 0x22, 0x20,
+  0x6d, 0x61, 0x78, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x61, 0x78, 0x7d, 0x22,
+  0x20, 0x73, 0x74, 0x65, 0x70, 0x3d, 0x22, 0x24, 0x7b, 0x73, 0x74, 0x65,
+  0x70, 0x7d, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x24, 0x7b,
+  0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x3d, 0x22, 0x24, 0x7b, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x22, 0x20,
+  0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x46, 0x6c,
+  0x6f, 0x61, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x73, 0x70, 0x61,
+  0x6e, 0x3e, 0x24, 0x7b, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x3c, 0x2f,
+  0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x49, 0x6e, 0x74, 0x46, 0x69,
+  0x65, 0x6c, 0x64, 0x20, 0x3d, 0x20, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65,
+  0x6c, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x2c,
+  0x20, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68,
+  0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61,
+  0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x24, 0x7b, 0x6e,
+  0x61, 0x6d, 0x65, 0x7d, 0x22, 0x3e, 0x24, 0x7b, 0x6c, 0x61, 0x62, 0x65,
+  0x6c, 0x7d, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22,
+  0x72, 0x61, 0x6e, 0x67, 0x65, 0x22, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x24,
+  0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x20, 0x6d, 0x69, 0x6e, 0x3d,
+  0x22, 0x24, 0x7b, 0x6d, 0x69, 0x6e, 0x7d, 0x22, 0x20, 0x6d, 0x61, 0x78,
+  0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x61, 0x78, 0x7d, 0x22, 0x20, 0x6e, 0x61,
+  0x6d, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22,
+  0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75,
+  0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61,
+  0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x24, 0x7b, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x7d, 0x3c, 0x2f, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69,
+  0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75,
+  0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x52,
+  0x65, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x65, 0x29, 0x20, 0x3d,
+  0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x65, 0x2e, 0x70, 0x72, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x44, 0x65, 0x66,
+  0x61, 0x75, 0x6c, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x65, 0x74, 0x54, 0x6f, 0x44,
+  0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x41, 0x6e, 0x64, 0x41, 0x70, 0x70,
+  0x6c, 0x79, 0x28, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
+  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x52, 0x65, 0x73, 0x65, 0x74, 0x42, 0x75, 0x74, 0x74, 0x6f,
+  0x6e, 0x20, 0x3d, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
+  0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x65, 0x64, 0x55, 0x73, 0x65, 0x72,
+  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x3d, 0x3d, 0x20, 0x27,
+  0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x27, 0x29, 0x20, 0x7b, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
+  0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20, 0x64, 0x69, 0x73, 0x61, 0x62,
+  0x6c, 0x65, 0x64, 0x3e, 0x55, 0x73, 0x69, 0x6e, 0x67, 0x20, 0x64, 0x65,
+  0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
+  0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x62, 0x75, 0x74, 0x74, 0x6f,
+  0x6e, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69, 0x63, 0x6b, 0x3d, 0x24, 0x7b,
+  0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
+  0x52, 0x65, 0x73, 0x65, 0x74, 0x7d, 0x3e, 0x52, 0x65, 0x73, 0x65, 0x74,
+  0x20, 0x61, 0x6c, 0x6c, 0x20, 0x74, 0x6f, 0x20, 0x64, 0x65, 0x66, 0x61,
+  0x75, 0x6c, 0x74, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x49, 0x6e, 0x74,
-  0x46, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x3d, 0x20, 0x28, 0x7b, 0x6c, 0x61,
-  0x62, 0x65, 0x6c, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x2c, 0x20, 0x6d, 0x69,
-  0x6e, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
-  0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x24,
-  0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x3e, 0x24, 0x7b, 0x6c, 0x61,
-  0x62, 0x65, 0x6c, 0x7d, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e,
+  0x20, 0x20, 0x20, 0x75, 0x73, 0x65, 0x45, 0x66, 0x66, 0x65, 0x63, 0x74,
+  0x28, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x61, 0x75, 0x74, 0x6f,
+  0x73, 0x61, 0x76, 0x65, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
+  0x65, 0x20, 0x6f, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x72, 0x79, 0x20, 0x63,
+  0x68, 0x61, 0x6e, 0x67, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x75, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61,
+  0x74, 0x65, 0x41, 0x75, 0x74, 0x6f, 0x73, 0x61, 0x76, 0x65, 0x28, 0x29,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x2c, 0x20, 0x5b, 0x73,
+  0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x5d, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x6f,
+  0x72, 0x6d, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65,
-  0x3d, 0x22, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x22, 0x20, 0x69, 0x64, 0x3d,
-  0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x22, 0x20, 0x6d, 0x69,
-  0x6e, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x69, 0x6e, 0x7d, 0x22, 0x20, 0x6d,
-  0x61, 0x78, 0x3d, 0x22, 0x24, 0x7b, 0x6d, 0x61, 0x78, 0x7d, 0x22, 0x20,
-  0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65,
-  0x7d, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e,
-  0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
-  0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74, 0x7d, 0x20, 0x2f,
-  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x24, 0x7b, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x7d, 0x3c, 0x2f, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f,
-  0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
-  0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x6f, 0x72, 0x6d, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69,
-  0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76,
-  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66,
-  0x6f, 0x72, 0x3d, 0x22, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22, 0x3e,
-  0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65,
-  0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72,
-  0x65, 0x61, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78,
-  0x74, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x70, 0x72, 0x6f,
-  0x6d, 0x70, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22,
-  0x24, 0x7b, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x7d, 0x22,
-  0x20, 0x72, 0x6f, 0x77, 0x73, 0x3d, 0x34, 0x20, 0x6f, 0x6e, 0x69, 0x6e,
-  0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
-  0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73,
-  0x65, 0x74, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74,
-  0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x74, 0x77, 0x6f, 0x22,
-  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c,
-  0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x75, 0x73,
-  0x65, 0x72, 0x22, 0x3e, 0x55, 0x73, 0x65, 0x72, 0x20, 0x6e, 0x61, 0x6d,
-  0x65, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d,
-  0x22, 0x74, 0x65, 0x78, 0x74, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d,
-  0x22, 0x75, 0x73, 0x65, 0x72, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65,
-  0x3d, 0x22, 0x24, 0x7b, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x75, 0x73, 0x65, 0x72, 0x7d, 0x22,
-  0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75,
-  0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e,
-  0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61,
-  0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x62, 0x6f, 0x74,
-  0x22, 0x3e, 0x42, 0x6f, 0x74, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3c, 0x2f,
-  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x69, 0x6e,
-  0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65,
-  0x78, 0x74, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x63, 0x68,
-  0x61, 0x72, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24,
-  0x7b, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x2e, 0x63, 0x68, 0x61, 0x72, 0x7d, 0x22, 0x20, 0x6f, 0x6e,
-  0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61,
-  0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x20, 0x2f,
-  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65,
-  0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64,
-  0x73, 0x65, 0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d,
-  0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x3e, 0x50,
-  0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61,
-  0x74, 0x65, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, 0x20, 0x69,
-  0x64, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22,
-  0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c,
-  0x61, 0x74, 0x65, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22,
-  0x24, 0x7b, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x2e, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
-  0x7d, 0x22, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x3d, 0x34, 0x20, 0x6f, 0x6e,
-  0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61,
-  0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69,
-  0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20,
-  0x66, 0x6f, 0x72, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
-  0x65, 0x22, 0x3e, 0x43, 0x68, 0x61, 0x74, 0x20, 0x68, 0x69, 0x73, 0x74,
-  0x6f, 0x72, 0x79, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
-  0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, 0x20, 0x69, 0x64, 0x3d,
-  0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x20, 0x6e,
-  0x61, 0x6d, 0x65, 0x3d, 0x22, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79,
-  0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x20, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x73, 0x65, 0x73, 0x73, 0x69,
-  0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x68, 0x69, 0x73,
-  0x74, 0x6f, 0x72, 0x79, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65,
-  0x7d, 0x22, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x3d, 0x31, 0x20, 0x6f, 0x6e,
-  0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61,
-  0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69,
-  0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20,
-  0x66, 0x6f, 0x72, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74,
-  0x65, 0x22, 0x3e, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x3c, 0x2f,
-  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65,
-  0x78, 0x74, 0x61, 0x72, 0x65, 0x61, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x67,
-  0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65,
-  0x3d, 0x22, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x22, 0x20, 0x70,
-  0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x3d, 0x22,
-  0x55, 0x73, 0x65, 0x20, 0x67, 0x62, 0x6e, 0x66, 0x20, 0x6f, 0x72, 0x20,
-  0x4a, 0x53, 0x4f, 0x4e, 0x20, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2b,
-  0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x67, 0x72, 0x61, 0x6d, 0x6d,
-  0x61, 0x72, 0x7d, 0x22, 0x20, 0x72, 0x6f, 0x77, 0x73, 0x3d, 0x34, 0x20,
-  0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70,
-  0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x7d, 0x2f,
-  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74,
-  0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74, 0x22, 0x20, 0x6e,
-  0x61, 0x6d, 0x65, 0x3d, 0x22, 0x70, 0x72, 0x6f, 0x70, 0x2d, 0x6f, 0x72,
-  0x64, 0x65, 0x72, 0x22, 0x20, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f,
-  0x6c, 0x64, 0x65, 0x72, 0x3d, 0x22, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x3a,
-  0x20, 0x70, 0x72, 0x6f, 0x70, 0x31, 0x2c, 0x70, 0x72, 0x6f, 0x70, 0x32,
-  0x2c, 0x70, 0x72, 0x6f, 0x70, 0x33, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e,
-  0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
-  0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53,
-  0x63, 0x68, 0x65, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64,
-  0x65, 0x72, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x62, 0x75,
-  0x74, 0x74, 0x6f, 0x6e, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x62,
-  0x75, 0x74, 0x74, 0x6f, 0x6e, 0x22, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69,
-  0x63, 0x6b, 0x3d, 0x24, 0x7b, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
-  0x4a, 0x53, 0x4f, 0x4e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x47, 0x72,
-  0x61, 0x6d, 0x6d, 0x61, 0x72, 0x7d, 0x3e, 0x43, 0x6f, 0x6e, 0x76, 0x65,
-  0x72, 0x74, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x20, 0x53, 0x63, 0x68, 0x65,
-  0x6d, 0x61, 0x3c, 0x2f, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x24, 0x7b, 0x55, 0x73, 0x65, 0x72, 0x54, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x65, 0x74, 0x42, 0x75, 0x74,
+  0x74, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64,
   0x73, 0x65, 0x74, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65,
-  0x74, 0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x74, 0x77, 0x6f,
-  0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x24, 0x7b, 0x49, 0x6e, 0x74, 0x46, 0x69, 0x65, 0x6c,
-  0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x50,
-  0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x2c,
-  0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x32, 0x30, 0x34, 0x38, 0x2c, 0x20,
-  0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x2d, 0x31, 0x2c, 0x20, 0x6e, 0x61, 0x6d,
-  0x65, 0x3a, 0x20, 0x22, 0x6e, 0x5f, 0x70, 0x72, 0x65, 0x64, 0x69, 0x63,
-  0x74, 0x22, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70,
-  0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e,
-  0x6e, 0x5f, 0x70, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x7d, 0x29, 0x7d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c,
-  0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54,
-  0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x22, 0x2c,
-  0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x35, 0x2c, 0x20, 0x6d,
-  0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d,
-  0x65, 0x3a, 0x20, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74,
-  0x75, 0x72, 0x65, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20,
-  0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a,
-  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x2e, 0x74, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72,
-  0x65, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74,
-  0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c,
-  0x3a, 0x20, 0x22, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x20,
-  0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x20, 0x73, 0x65, 0x71, 0x75, 0x65,
-  0x6e, 0x63, 0x65, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x32,
-  0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30,
-  0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x72, 0x65, 0x70,
-  0x65, 0x61, 0x74, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x22,
-  0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31,
-  0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72,
-  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x72, 0x65,
-  0x70, 0x65, 0x61, 0x74, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79,
-  0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x49, 0x6e, 0x74, 0x46, 0x69, 0x65,
-  0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22,
-  0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x20, 0x4e, 0x20, 0x74,
-  0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x65,
-  0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78,
-  0x3a, 0x20, 0x32, 0x30, 0x34, 0x38, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a,
-  0x20, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x72,
-  0x65, 0x70, 0x65, 0x61, 0x74, 0x5f, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x6e,
-  0x22, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61,
-  0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x72,
-  0x65, 0x70, 0x65, 0x61, 0x74, 0x5f, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x6e,
-  0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x49, 0x6e, 0x74, 0x46, 0x69, 0x65,
-  0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22,
-  0x54, 0x6f, 0x70, 0x2d, 0x4b, 0x20, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x69,
-  0x6e, 0x67, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x30,
-  0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x2d, 0x31, 0x2c, 0x20,
-  0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x74, 0x6f, 0x70, 0x5f, 0x6b,
-  0x22, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61,
-  0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74,
-  0x6f, 0x70, 0x5f, 0x6b, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x70,
+  0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22, 0x3e, 0x50, 0x72, 0x6f, 0x6d, 0x70,
+  0x74, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61, 0x20, 0x74, 0x79,
+  0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74, 0x22, 0x20, 0x6e, 0x61,
+  0x6d, 0x65, 0x3d, 0x22, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22, 0x20,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x73, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70,
+  0x72, 0x6f, 0x6d, 0x70, 0x74, 0x7d, 0x22, 0x20, 0x72, 0x6f, 0x77, 0x73,
+  0x3d, 0x34, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24,
+  0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69,
+  0x6f, 0x6e, 0x7d, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66,
+  0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x73,
+  0x73, 0x3d, 0x22, 0x74, 0x77, 0x6f, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69,
+  0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20,
+  0x66, 0x6f, 0x72, 0x3d, 0x22, 0x75, 0x73, 0x65, 0x72, 0x22, 0x3e, 0x55,
+  0x73, 0x65, 0x72, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3c, 0x2f, 0x6c, 0x61,
+  0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75,
+  0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74,
+  0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x75, 0x73, 0x65, 0x72,
+  0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x73,
+  0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x2e, 0x75, 0x73, 0x65, 0x72, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e,
+  0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65,
+  0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x20, 0x2f, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66,
+  0x6f, 0x72, 0x3d, 0x22, 0x62, 0x6f, 0x74, 0x22, 0x3e, 0x42, 0x6f, 0x74,
+  0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74,
+  0x79, 0x70, 0x65, 0x3d, 0x22, 0x74, 0x65, 0x78, 0x74, 0x22, 0x20, 0x6e,
+  0x61, 0x6d, 0x65, 0x3d, 0x22, 0x63, 0x68, 0x61, 0x72, 0x22, 0x20, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x73, 0x65, 0x73, 0x73,
+  0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x63, 0x68,
+  0x61, 0x72, 0x7d, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74,
+  0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64,
+  0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74,
+  0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62,
+  0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x22, 0x3e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
+  0x20, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x3c, 0x2f, 0x6c,
+  0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65, 0x78,
+  0x74, 0x61, 0x72, 0x65, 0x61, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x74, 0x65,
+  0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65,
+  0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x20,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24, 0x7b, 0x73, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x7d, 0x22, 0x20, 0x72, 0x6f,
+  0x77, 0x73, 0x3d, 0x34, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74,
+  0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69,
+  0x76, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x3e, 0x43, 0x68,
+  0x61, 0x74, 0x20, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x20, 0x74,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x3c, 0x2f, 0x6c, 0x61, 0x62,
+  0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61,
+  0x72, 0x65, 0x61, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x74, 0x65, 0x6d, 0x70,
+  0x6c, 0x61, 0x74, 0x65, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22,
+  0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x54, 0x65, 0x6d, 0x70, 0x6c,
+  0x61, 0x74, 0x65, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22,
+  0x24, 0x7b, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x2e, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x72, 0x79, 0x54,
+  0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x7d, 0x22, 0x20, 0x72, 0x6f,
+  0x77, 0x73, 0x3d, 0x31, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74,
+  0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x53, 0x65, 0x73,
+  0x73, 0x69, 0x6f, 0x6e, 0x7d, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69,
+  0x76, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x3d, 0x22,
+  0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x22, 0x3e, 0x47, 0x72,
+  0x61, 0x6d, 0x6d, 0x61, 0x72, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65,
+  0x61, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61,
+  0x72, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x67, 0x72, 0x61,
+  0x6d, 0x6d, 0x61, 0x72, 0x22, 0x20, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x68,
+  0x6f, 0x6c, 0x64, 0x65, 0x72, 0x3d, 0x22, 0x55, 0x73, 0x65, 0x20, 0x67,
+  0x62, 0x6e, 0x66, 0x20, 0x6f, 0x72, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x20,
+  0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2b, 0x63, 0x6f, 0x6e, 0x76, 0x65,
+  0x72, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x24,
+  0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x2e, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x7d, 0x22, 0x20,
+  0x72, 0x6f, 0x77, 0x73, 0x3d, 0x34, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70,
+  0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50,
+  0x61, 0x72, 0x61, 0x6d, 0x73, 0x7d, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22,
+  0x74, 0x65, 0x78, 0x74, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22,
+  0x70, 0x72, 0x6f, 0x70, 0x2d, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x22, 0x20,
+  0x70, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x3d,
+  0x22, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x3a, 0x20, 0x70, 0x72, 0x6f, 0x70,
+  0x31, 0x2c, 0x70, 0x72, 0x6f, 0x70, 0x32, 0x2c, 0x70, 0x72, 0x6f, 0x70,
+  0x33, 0x22, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24,
+  0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, 0x72, 0x61, 0x6d, 0x6d,
+  0x61, 0x72, 0x4a, 0x73, 0x6f, 0x6e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61,
+  0x50, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x7d, 0x20, 0x2f,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e, 0x20,
+  0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x62, 0x75, 0x74, 0x74, 0x6f, 0x6e,
+  0x22, 0x20, 0x6f, 0x6e, 0x63, 0x6c, 0x69, 0x63, 0x6b, 0x3d, 0x24, 0x7b,
+  0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x4a, 0x53, 0x4f, 0x4e, 0x53,
+  0x63, 0x68, 0x65, 0x6d, 0x61, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72,
+  0x7d, 0x3e, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x20, 0x4a, 0x53,
+  0x4f, 0x4e, 0x20, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x3c, 0x2f, 0x62,
+  0x75, 0x74, 0x74, 0x6f, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x20, 0x63, 0x6c, 0x61,
+  0x73, 0x73, 0x3d, 0x22, 0x74, 0x77, 0x6f, 0x22, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b,
+  0x49, 0x6e, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61,
+  0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63,
+  0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a,
+  0x20, 0x32, 0x30, 0x34, 0x38, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20,
+  0x2d, 0x31, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x6e,
+  0x5f, 0x70, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x22, 0x2c, 0x20, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6e, 0x5f, 0x70, 0x72, 0x65,
+  0x64, 0x69, 0x63, 0x74, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c,
   0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61,
-  0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54, 0x6f, 0x70, 0x2d, 0x50, 0x20,
-  0x73, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x22, 0x2c, 0x20, 0x6d,
-  0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e,
-  0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a,
-  0x20, 0x22, 0x74, 0x6f, 0x70, 0x5f, 0x70, 0x22, 0x2c, 0x20, 0x73, 0x74,
-  0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x6f, 0x70, 0x5f, 0x70, 0x7d,
-  0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x73, 0x75,
-  0x6d, 0x6d, 0x61, 0x72, 0x79, 0x3e, 0x4d, 0x6f, 0x72, 0x65, 0x20, 0x6f,
-  0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x3c, 0x2f, 0x73, 0x75, 0x6d, 0x6d,
-  0x61, 0x72, 0x79, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73,
-  0x65, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x74, 0x77,
-  0x6f, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61,
-  0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65,
-  0x6c, 0x3a, 0x20, 0x22, 0x54, 0x46, 0x53, 0x2d, 0x5a, 0x22, 0x2c, 0x20,
-  0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69,
-  0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65,
-  0x3a, 0x20, 0x22, 0x74, 0x66, 0x73, 0x5f, 0x7a, 0x22, 0x2c, 0x20, 0x73,
-  0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76,
-  0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
-  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x66, 0x73, 0x5f, 0x7a,
-  0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61,
-  0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65,
-  0x6c, 0x3a, 0x20, 0x22, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x20,
-  0x50, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30,
-  0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20,
-  0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x74, 0x79, 0x70, 0x69, 0x63,
-  0x61, 0x6c, 0x5f, 0x70, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a,
-  0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65,
-  0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x2e, 0x74, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x5f, 0x70,
-  0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61,
-  0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65,
-  0x6c, 0x3a, 0x20, 0x22, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65,
-  0x20, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x22, 0x2c, 0x20, 0x6d,
-  0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e,
-  0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a,
-  0x20, 0x22, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x70,
+  0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72,
+  0x61, 0x74, 0x75, 0x72, 0x65, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a,
+  0x20, 0x31, 0x2e, 0x35, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30,
+  0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x74,
+  0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x22, 0x2c,
+  0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c,
+  0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61,
+  0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x65, 0x6d,
+  0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x7d, 0x29, 0x7d, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64,
+  0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x50, 0x65,
+  0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x20, 0x72, 0x65, 0x70, 0x65, 0x61,
+  0x74, 0x20, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x65, 0x22, 0x2c,
+  0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x32, 0x2e, 0x30, 0x2c, 0x20, 0x6d,
+  0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d,
+  0x65, 0x3a, 0x20, 0x22, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x5f, 0x70,
   0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65,
   0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c,
   0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76,
-  0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63,
-  0x65, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x7d, 0x29, 0x7d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x61, 0x6c, 0x75, 0x65, 0x2e, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x5f,
+  0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x7d, 0x29, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24,
+  0x7b, 0x49, 0x6e, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c,
+  0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x43, 0x6f, 0x6e, 0x73, 0x69,
+  0x64, 0x65, 0x72, 0x20, 0x4e, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
+  0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a,
+  0x65, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x32, 0x30, 0x34,
+  0x38, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2c, 0x20, 0x6e,
+  0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74,
+  0x5f, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x6e, 0x22, 0x2c, 0x20, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74,
+  0x5f, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x6e, 0x7d, 0x29, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24,
+  0x7b, 0x49, 0x6e, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c,
+  0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54, 0x6f, 0x70, 0x2d, 0x4b,
+  0x20, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x22, 0x2c, 0x20,
+  0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x30, 0x30, 0x2c, 0x20, 0x6d, 0x69,
+  0x6e, 0x3a, 0x20, 0x2d, 0x31, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a,
+  0x20, 0x22, 0x74, 0x6f, 0x70, 0x5f, 0x6b, 0x22, 0x2c, 0x20, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x6f, 0x70, 0x5f, 0x6b, 0x7d,
+  0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69,
   0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20,
-  0x22, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x20, 0x70,
-  0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78,
-  0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20,
-  0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22,
-  0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x70, 0x65,
-  0x6e, 0x61, 0x6c, 0x74, 0x79, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70,
-  0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61,
-  0x6c, 0x75, 0x65, 0x2e, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63,
-  0x79, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x7d, 0x29, 0x7d,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x68, 0x72, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65,
-  0x6c, 0x64, 0x73, 0x65, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d,
-  0x22, 0x74, 0x68, 0x72, 0x65, 0x65, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61,
-  0x62, 0x65, 0x6c, 0x3e, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74,
-  0x79, 0x70, 0x65, 0x3d, 0x22, 0x72, 0x61, 0x64, 0x69, 0x6f, 0x22, 0x20,
-  0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74,
-  0x61, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x30,
-  0x22, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x65, 0x64, 0x3d, 0x24, 0x7b,
+  0x22, 0x54, 0x6f, 0x70, 0x2d, 0x50, 0x20, 0x73, 0x61, 0x6d, 0x70, 0x6c,
+  0x69, 0x6e, 0x67, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31,
+  0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30,
+  0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x74, 0x6f, 0x70,
+  0x5f, 0x70, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30,
+  0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20,
   0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
-  0x2e, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x3d, 0x3d,
-  0x20, 0x30, 0x7d, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d,
-  0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61,
-  0x6d, 0x73, 0x49, 0x6e, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x20, 0x6e, 0x6f,
-  0x20, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x3c, 0x2f, 0x6c,
-  0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c,
-  0x61, 0x62, 0x65, 0x6c, 0x3e, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20,
-  0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x72, 0x61, 0x64, 0x69, 0x6f, 0x22,
-  0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73,
-  0x74, 0x61, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22,
-  0x31, 0x22, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x65, 0x64, 0x3d, 0x24,
-  0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
-  0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x3d,
-  0x3d, 0x20, 0x31, 0x7d, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74,
-  0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72,
-  0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x20, 0x4d,
-  0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x76, 0x31, 0x3c, 0x2f,
-  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74,
-  0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22, 0x72, 0x61, 0x64, 0x69, 0x6f,
-  0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x6d, 0x69, 0x72, 0x6f,
-  0x73, 0x74, 0x61, 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d,
-  0x22, 0x32, 0x22, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x65, 0x64, 0x3d,
-  0x24, 0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20,
-  0x3d, 0x3d, 0x20, 0x32, 0x7d, 0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75,
-  0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61,
-  0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x20,
-  0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x76, 0x32, 0x3c,
-  0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f,
-  0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f,
-  0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62,
-  0x65, 0x6c, 0x3a, 0x20, 0x22, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61,
-  0x74, 0x20, 0x74, 0x61, 0x75, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a,
-  0x20, 0x31, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20,
-  0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22,
-  0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x5f, 0x74, 0x61, 0x75,
+  0x2e, 0x74, 0x6f, 0x70, 0x5f, 0x70, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69,
+  0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x65, 0x74, 0x61, 0x69,
+  0x6c, 0x73, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79,
+  0x3e, 0x4d, 0x6f, 0x72, 0x65, 0x20, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
+  0x73, 0x3c, 0x2f, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x20, 0x63, 0x6c,
+  0x61, 0x73, 0x73, 0x3d, 0x22, 0x74, 0x77, 0x6f, 0x22, 0x3e, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c,
+  0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54,
+  0x46, 0x53, 0x2d, 0x5a, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20,
+  0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e,
+  0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x74, 0x66,
+  0x73, 0x5f, 0x7a, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20,
+  0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a,
+  0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x2e, 0x74, 0x66, 0x73, 0x5f, 0x7a, 0x7d, 0x29, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c,
+  0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x54,
+  0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x20, 0x50, 0x22, 0x2c, 0x20, 0x6d,
+  0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e,
+  0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a,
+  0x20, 0x22, 0x74, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x5f, 0x70, 0x22,
+  0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31,
+  0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72,
+  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x74, 0x79,
+  0x70, 0x69, 0x63, 0x61, 0x6c, 0x5f, 0x70, 0x7d, 0x29, 0x7d, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c,
+  0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x50,
+  0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x20, 0x70, 0x65, 0x6e, 0x61,
+  0x6c, 0x74, 0x79, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31,
+  0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30,
+  0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x70, 0x72, 0x65,
+  0x73, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74,
+  0x79, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e,
+  0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70,
+  0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e,
+  0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x70, 0x65, 0x6e,
+  0x61, 0x6c, 0x74, 0x79, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b,
+  0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b,
+  0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x46, 0x72, 0x65, 0x71,
+  0x75, 0x65, 0x6e, 0x63, 0x79, 0x20, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74,
+  0x79, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30,
+  0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20,
+  0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x66, 0x72, 0x65, 0x71, 0x75,
+  0x65, 0x6e, 0x63, 0x79, 0x5f, 0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79,
   0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30,
   0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61,
-  0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d,
-  0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x5f, 0x74, 0x61, 0x75, 0x7d,
-  0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74,
-  0x46, 0x69, 0x65, 0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c,
-  0x3a, 0x20, 0x22, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20,
-  0x65, 0x74, 0x61, 0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31,
-  0x2e, 0x30, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30,
-  0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x6d, 0x69, 0x72,
-  0x6f, 0x73, 0x74, 0x61, 0x74, 0x5f, 0x65, 0x74, 0x61, 0x22, 0x2c, 0x20,
-  0x73, 0x74, 0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
-  0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f,
-  0x73, 0x74, 0x61, 0x74, 0x5f, 0x65, 0x74, 0x61, 0x7d, 0x29, 0x7d, 0x0a,
+  0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x66,
+  0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x70, 0x65, 0x6e,
+  0x61, 0x6c, 0x74, 0x79, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69,
+  0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x68, 0x72, 0x20,
+  0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74,
+  0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x22, 0x74, 0x68, 0x72, 0x65,
+  0x65, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x3e, 0x0a,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x3c, 0x2f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f,
-  0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x6f, 0x72, 0x6d, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x6f,
-  0x6f, 0x72, 0x20, 0x6d, 0x61, 0x6e, 0x73, 0x20, 0x6d, 0x61, 0x72, 0x6b,
-  0x64, 0x6f, 0x77, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65,
-  0x6d, 0x65, 0x6e, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
-  0x73, 0x74, 0x20, 0x4d, 0x61, 0x72, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x69,
-  0x73, 0x68, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
-  0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x64, 0x20, 0x3d, 0x20,
-  0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70,
-  0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x26, 0x2f, 0x67, 0x2c, 0x20, 0x27,
-  0x26, 0x61, 0x6d, 0x70, 0x3b, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65,
-  0x28, 0x2f, 0x3c, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x26, 0x6c, 0x74, 0x3b,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e, 0x3c,
+  0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d, 0x22,
+  0x72, 0x61, 0x64, 0x69, 0x6f, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d,
+  0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x22, 0x20, 0x76,
+  0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x30, 0x22, 0x20, 0x63, 0x68, 0x65,
+  0x63, 0x6b, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d,
+  0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f,
+  0x73, 0x74, 0x61, 0x74, 0x20, 0x3d, 0x3d, 0x20, 0x30, 0x7d, 0x20, 0x6f,
+  0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70, 0x64,
+  0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e, 0x74,
+  0x7d, 0x20, 0x2f, 0x3e, 0x20, 0x6e, 0x6f, 0x20, 0x4d, 0x69, 0x72, 0x6f,
+  0x73, 0x74, 0x61, 0x74, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3e,
+  0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65, 0x3d,
+  0x22, 0x72, 0x61, 0x64, 0x69, 0x6f, 0x22, 0x20, 0x6e, 0x61, 0x6d, 0x65,
+  0x3d, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x22, 0x20,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x31, 0x22, 0x20, 0x63, 0x68,
+  0x65, 0x63, 0x6b, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61,
+  0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72,
+  0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x3d, 0x3d, 0x20, 0x31, 0x7d, 0x20,
+  0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75, 0x70,
+  0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49, 0x6e,
+  0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x20, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74,
+  0x61, 0x74, 0x20, 0x76, 0x31, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65, 0x6c,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6c, 0x61, 0x62, 0x65, 0x6c,
+  0x3e, 0x3c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x74, 0x79, 0x70, 0x65,
+  0x3d, 0x22, 0x72, 0x61, 0x64, 0x69, 0x6f, 0x22, 0x20, 0x6e, 0x61, 0x6d,
+  0x65, 0x3d, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x22,
+  0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3d, 0x22, 0x32, 0x22, 0x20, 0x63,
+  0x68, 0x65, 0x63, 0x6b, 0x65, 0x64, 0x3d, 0x24, 0x7b, 0x70, 0x61, 0x72,
+  0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d, 0x69,
+  0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x3d, 0x3d, 0x20, 0x32, 0x7d,
+  0x20, 0x6f, 0x6e, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x3d, 0x24, 0x7b, 0x75,
+  0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x49,
+  0x6e, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x20, 0x4d, 0x69, 0x72, 0x6f, 0x73,
+  0x74, 0x61, 0x74, 0x20, 0x76, 0x32, 0x3c, 0x2f, 0x6c, 0x61, 0x62, 0x65,
+  0x6c, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65,
+  0x6c, 0x64, 0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22,
+  0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x74, 0x61, 0x75,
+  0x22, 0x2c, 0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x30, 0x2e, 0x30,
+  0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20,
+  0x6e, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73,
+  0x74, 0x61, 0x74, 0x5f, 0x74, 0x61, 0x75, 0x22, 0x2c, 0x20, 0x73, 0x74,
+  0x65, 0x70, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61,
+  0x6c, 0x75, 0x65, 0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74,
+  0x61, 0x74, 0x5f, 0x74, 0x61, 0x75, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x24, 0x7b, 0x46, 0x6c, 0x6f, 0x61, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64,
+  0x28, 0x7b, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x3a, 0x20, 0x22, 0x4d, 0x69,
+  0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x20, 0x65, 0x74, 0x61, 0x22, 0x2c,
+  0x20, 0x6d, 0x61, 0x78, 0x3a, 0x20, 0x31, 0x2e, 0x30, 0x2c, 0x20, 0x6d,
+  0x69, 0x6e, 0x3a, 0x20, 0x30, 0x2e, 0x30, 0x2c, 0x20, 0x6e, 0x61, 0x6d,
+  0x65, 0x3a, 0x20, 0x22, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74,
+  0x5f, 0x65, 0x74, 0x61, 0x22, 0x2c, 0x20, 0x73, 0x74, 0x65, 0x70, 0x3a,
+  0x20, 0x30, 0x2e, 0x30, 0x31, 0x2c, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x3a, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x2e, 0x6d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x5f,
+  0x65, 0x74, 0x61, 0x7d, 0x29, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x69, 0x65,
+  0x6c, 0x64, 0x73, 0x65, 0x74, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x65, 0x74, 0x61, 0x69,
+  0x6c, 0x73, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x3c, 0x2f, 0x66, 0x6f, 0x72, 0x6d, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x6f, 0x6f, 0x72, 0x20, 0x6d, 0x61,
+  0x6e, 0x73, 0x20, 0x6d, 0x61, 0x72, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x20,
+  0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x4d, 0x61,
+  0x72, 0x6b, 0x64, 0x6f, 0x77, 0x6e, 0x69, 0x73, 0x68, 0x20, 0x3d, 0x20,
+  0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x6d, 0x64, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
+  0x73, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28,
+  0x2f, 0x26, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x26, 0x61, 0x6d, 0x70, 0x3b,
   0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e,
-  0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x3e, 0x2f, 0x67,
-  0x2c, 0x20, 0x27, 0x26, 0x67, 0x74, 0x3b, 0x27, 0x29, 0x0a, 0x20, 0x20,
+  0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x3c, 0x2f, 0x67,
+  0x2c, 0x20, 0x27, 0x26, 0x6c, 0x74, 0x3b, 0x27, 0x29, 0x0a, 0x20, 0x20,
   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61,
-  0x63, 0x65, 0x28, 0x2f, 0x5e, 0x23, 0x7b, 0x31, 0x2c, 0x36, 0x7d, 0x20,
-  0x28, 0x2e, 0x2a, 0x29, 0x24, 0x2f, 0x67, 0x69, 0x6d, 0x2c, 0x20, 0x27,
-  0x3c, 0x68, 0x33, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x68, 0x33, 0x3e, 0x27,
-  0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72,
-  0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5c, 0x2a, 0x5c, 0x2a,
-  0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x5c, 0x2a, 0x5c, 0x2a, 0x2f, 0x67, 0x2c,
-  0x20, 0x27, 0x3c, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x24, 0x31,
-  0x3c, 0x2f, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x27, 0x29, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70,
-  0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5f, 0x5f, 0x28, 0x2e, 0x2a, 0x3f,
-  0x29, 0x5f, 0x5f, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x73, 0x74, 0x72,
-  0x6f, 0x6e, 0x67, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x73, 0x74, 0x72, 0x6f,
-  0x6e, 0x67, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f,
-  0x5c, 0x2a, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x5c, 0x2a, 0x2f, 0x67, 0x2c,
-  0x20, 0x27, 0x3c, 0x65, 0x6d, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x65, 0x6d,
-  0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5f, 0x28,
-  0x2e, 0x2a, 0x3f, 0x29, 0x5f, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x65,
-  0x6d, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x65, 0x6d, 0x3e, 0x27, 0x29, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70,
-  0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x60, 0x60, 0x60, 0x2e, 0x2a, 0x3f,
-  0x5c, 0x6e, 0x28, 0x5b, 0x5c, 0x73, 0x5c, 0x53, 0x5d, 0x2a, 0x3f, 0x29,
-  0x60, 0x60, 0x60, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x70, 0x72, 0x65,
-  0x3e, 0x3c, 0x63, 0x6f, 0x64, 0x65, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x63,
-  0x6f, 0x64, 0x65, 0x3e, 0x3c, 0x2f, 0x70, 0x72, 0x65, 0x3e, 0x27, 0x29,
+  0x63, 0x65, 0x28, 0x2f, 0x3e, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x26, 0x67,
+  0x74, 0x3b, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5e,
+  0x23, 0x7b, 0x31, 0x2c, 0x36, 0x7d, 0x20, 0x28, 0x2e, 0x2a, 0x29, 0x24,
+  0x2f, 0x67, 0x69, 0x6d, 0x2c, 0x20, 0x27, 0x3c, 0x68, 0x33, 0x3e, 0x24,
+  0x31, 0x3c, 0x2f, 0x68, 0x33, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
+  0x65, 0x28, 0x2f, 0x5c, 0x2a, 0x5c, 0x2a, 0x28, 0x2e, 0x2a, 0x3f, 0x29,
+  0x5c, 0x2a, 0x5c, 0x2a, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x73, 0x74,
+  0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x73, 0x74, 0x72,
+  0x6f, 0x6e, 0x67, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28,
+  0x2f, 0x5f, 0x5f, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x5f, 0x5f, 0x2f, 0x67,
+  0x2c, 0x20, 0x27, 0x3c, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x24,
+  0x31, 0x3c, 0x2f, 0x73, 0x74, 0x72, 0x6f, 0x6e, 0x67, 0x3e, 0x27, 0x29,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65,
-  0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x60, 0x28, 0x2e, 0x2a, 0x3f,
-  0x29, 0x60, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x63, 0x6f, 0x64, 0x65,
-  0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x63, 0x6f, 0x64, 0x65, 0x3e, 0x27, 0x29,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65,
-  0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5c, 0x6e, 0x2f, 0x67, 0x69,
-  0x6d, 0x2c, 0x20, 0x27, 0x3c, 0x62, 0x72, 0x20, 0x2f, 0x3e, 0x27, 0x29,
-  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75,
-  0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x3c, 0x73, 0x70, 0x61,
-  0x6e, 0x20, 0x64, 0x61, 0x6e, 0x67, 0x65, 0x72, 0x6f, 0x75, 0x73, 0x6c,
-  0x79, 0x53, 0x65, 0x74, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x48, 0x54, 0x4d,
-  0x4c, 0x3d, 0x24, 0x7b, 0x7b, 0x20, 0x5f, 0x5f, 0x68, 0x74, 0x6d, 0x6c,
-  0x3a, 0x20, 0x6d, 0x64, 0x20, 0x7d, 0x7d, 0x20, 0x2f, 0x3e, 0x60, 0x3b,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
-  0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e,
-  0x66, 0x6f, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
-  0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x53,
-  0x74, 0x61, 0x74, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20,
-  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
-  0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x3c, 0x73,
-  0x70, 0x61, 0x6e, 0x2f, 0x3e, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
-  0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x73, 0x70, 0x61, 0x6e, 0x3e,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24,
-  0x7b, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x53, 0x74, 0x61, 0x74, 0x73, 0x2e,
-  0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x65, 0x64, 0x69, 0x63,
-  0x74, 0x65, 0x64, 0x5f, 0x70, 0x65, 0x72, 0x5f, 0x74, 0x6f, 0x6b, 0x65,
-  0x6e, 0x5f, 0x6d, 0x73, 0x2e, 0x74, 0x6f, 0x46, 0x69, 0x78, 0x65, 0x64,
-  0x28, 0x29, 0x7d, 0x6d, 0x73, 0x20, 0x70, 0x65, 0x72, 0x20, 0x74, 0x6f,
-  0x6b, 0x65, 0x6e, 0x2c, 0x20, 0x24, 0x7b, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
-  0x53, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e,
-  0x70, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x65, 0x64, 0x5f, 0x70, 0x65,
-  0x72, 0x5f, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x2e, 0x74, 0x6f, 0x46,
-  0x69, 0x78, 0x65, 0x64, 0x28, 0x32, 0x29, 0x7d, 0x20, 0x74, 0x6f, 0x6b,
-  0x65, 0x6e, 0x73, 0x20, 0x70, 0x65, 0x72, 0x20, 0x73, 0x65, 0x63, 0x6f,
-  0x6e, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x2f, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41,
-  0x70, 0x70, 0x28, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x29, 0x20, 0x7b, 0x0a,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
-  0x6e, 0x20, 0x68, 0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x64, 0x69, 0x76, 0x20, 0x69, 0x64, 0x3d,
-  0x22, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x22, 0x3e,
+  0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x2f, 0x5c, 0x2a, 0x28, 0x2e, 0x2a,
+  0x3f, 0x29, 0x5c, 0x2a, 0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x65, 0x6d,
+  0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x65, 0x6d, 0x3e, 0x27, 0x29, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c,
+  0x61, 0x63, 0x65, 0x28, 0x2f, 0x5f, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x5f,
+  0x2f, 0x67, 0x2c, 0x20, 0x27, 0x3c, 0x65, 0x6d, 0x3e, 0x24, 0x31, 0x3c,
+  0x2f, 0x65, 0x6d, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x28,
+  0x2f, 0x60, 0x60, 0x60, 0x2e, 0x2a, 0x3f, 0x5c, 0x6e, 0x28, 0x5b, 0x5c,
+  0x73, 0x5c, 0x53, 0x5d, 0x2a, 0x3f, 0x29, 0x60, 0x60, 0x60, 0x2f, 0x67,
+  0x2c, 0x20, 0x27, 0x3c, 0x70, 0x72, 0x65, 0x3e, 0x3c, 0x63, 0x6f, 0x64,
+  0x65, 0x3e, 0x24, 0x31, 0x3c, 0x2f, 0x63, 0x6f, 0x64, 0x65, 0x3e, 0x3c,
+  0x2f, 0x70, 0x72, 0x65, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65,
+  0x28, 0x2f, 0x60, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x60, 0x2f, 0x67, 0x2c,
+  0x20, 0x27, 0x3c, 0x63, 0x6f, 0x64, 0x65, 0x3e, 0x24, 0x31, 0x3c, 0x2f,
+  0x63, 0x6f, 0x64, 0x65, 0x3e, 0x27, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x2e, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65,
+  0x28, 0x2f, 0x5c, 0x6e, 0x2f, 0x67, 0x69, 0x6d, 0x2c, 0x20, 0x27, 0x3c,
+  0x62, 0x72, 0x20, 0x2f, 0x3e, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74,
+  0x6d, 0x6c, 0x60, 0x3c, 0x73, 0x70, 0x61, 0x6e, 0x20, 0x64, 0x61, 0x6e,
+  0x67, 0x65, 0x72, 0x6f, 0x75, 0x73, 0x6c, 0x79, 0x53, 0x65, 0x74, 0x49,
+  0x6e, 0x6e, 0x65, 0x72, 0x48, 0x54, 0x4d, 0x4c, 0x3d, 0x24, 0x7b, 0x7b,
+  0x20, 0x5f, 0x5f, 0x68, 0x74, 0x6d, 0x6c, 0x3a, 0x20, 0x6d, 0x64, 0x20,
+  0x7d, 0x7d, 0x20, 0x2f, 0x3e, 0x60, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x7d, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x20, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x47, 0x65, 0x6e, 0x65, 0x72,
+  0x61, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20,
+  0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x29, 0x20, 0x3d, 0x3e, 0x20,
+  0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
+  0x21, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x53, 0x74, 0x61, 0x74, 0x73, 0x2e,
+  0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
+  0x68, 0x74, 0x6d, 0x6c, 0x60, 0x3c, 0x73, 0x70, 0x61, 0x6e, 0x2f, 0x3e,
+  0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68,
+  0x74, 0x6d, 0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x73, 0x70, 0x61, 0x6e, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x24, 0x7b, 0x6c, 0x6c, 0x61, 0x6d,
+  0x61, 0x53, 0x74, 0x61, 0x74, 0x73, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65,
+  0x2e, 0x70, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x65, 0x64, 0x5f, 0x70,
+  0x65, 0x72, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x5f, 0x6d, 0x73, 0x2e,
+  0x74, 0x6f, 0x46, 0x69, 0x78, 0x65, 0x64, 0x28, 0x29, 0x7d, 0x6d, 0x73,
+  0x20, 0x70, 0x65, 0x72, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+  0x24, 0x7b, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x53, 0x74, 0x61, 0x74, 0x73,
+  0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x65, 0x64, 0x69,
+  0x63, 0x74, 0x65, 0x64, 0x5f, 0x70, 0x65, 0x72, 0x5f, 0x73, 0x65, 0x63,
+  0x6f, 0x6e, 0x64, 0x2e, 0x74, 0x6f, 0x46, 0x69, 0x78, 0x65, 0x64, 0x28,
+  0x32, 0x29, 0x7d, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x20, 0x70,
+  0x65, 0x72, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x73, 0x70, 0x61, 0x6e,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x75, 0x6e,
+  0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41, 0x70, 0x70, 0x28, 0x70, 0x72,
+  0x6f, 0x70, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x68, 0x74, 0x6d,
+  0x6c, 0x60, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
+  0x64, 0x69, 0x76, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x63, 0x6f, 0x6e, 0x74,
+  0x61, 0x69, 0x6e, 0x65, 0x72, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x68, 0x65, 0x61, 0x64, 0x65,
+  0x72, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x68, 0x31, 0x3e, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
+  0x2e, 0x63, 0x70, 0x70, 0x3c, 0x2f, 0x68, 0x31, 0x3e, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x68, 0x65,
+  0x61, 0x64, 0x65, 0x72, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6d, 0x61, 0x69, 0x6e, 0x20, 0x69,
+  0x64, 0x3d, 0x22, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x22, 0x3e,
+  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x3c, 0x24, 0x7b, 0x63, 0x68, 0x61, 0x74, 0x53, 0x74, 0x61, 0x72,
+  0x74, 0x65, 0x64, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x3f, 0x20,
+  0x43, 0x68, 0x61, 0x74, 0x4c, 0x6f, 0x67, 0x20, 0x3a, 0x20, 0x43, 0x6f,
+  0x6e, 0x66, 0x69, 0x67, 0x46, 0x6f, 0x72, 0x6d, 0x7d, 0x20, 0x2f, 0x3e,
   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x68, 0x31, 0x3e,
-  0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x3c, 0x2f, 0x68,
-  0x31, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x3c, 0x2f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x3e, 0x0a, 0x0a,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x6d,
-  0x61, 0x69, 0x6e, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x63, 0x6f, 0x6e, 0x74,
-  0x65, 0x6e, 0x74, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x24, 0x7b, 0x63, 0x68, 0x61,
-  0x74, 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x76, 0x61, 0x6c,
-  0x75, 0x65, 0x20, 0x3f, 0x20, 0x43, 0x68, 0x61, 0x74, 0x4c, 0x6f, 0x67,
-  0x20, 0x3a, 0x20, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x46, 0x6f, 0x72,
-  0x6d, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x6d, 0x61, 0x69, 0x6e, 0x3e, 0x0a,
-  0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c,
-  0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x69, 0x64, 0x3d, 0x22,
-  0x77, 0x72, 0x69, 0x74, 0x65, 0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x24, 0x7b, 0x4d,
-  0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x7d,
-  0x20, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x3c, 0x2f, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x3e,
-  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x3c, 0x66, 0x6f, 0x6f, 0x74, 0x65, 0x72, 0x3e, 0x0a, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x70, 0x3e,
-  0x3c, 0x24, 0x7b, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x47, 0x65, 0x6e, 0x65,
-  0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x7d, 0x20,
-  0x2f, 0x3e, 0x3c, 0x2f, 0x70, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x70, 0x3e, 0x50, 0x6f,
-  0x77, 0x65, 0x72, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x3c, 0x61, 0x20,
-  0x68, 0x72, 0x65, 0x66, 0x3d, 0x22, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a,
-  0x2f, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d,
-  0x2f, 0x67, 0x67, 0x65, 0x72, 0x67, 0x61, 0x6e, 0x6f, 0x76, 0x2f, 0x6c,
-  0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x22, 0x3e, 0x6c, 0x6c,
-  0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x3c, 0x2f, 0x61, 0x3e, 0x20,
-  0x61, 0x6e, 0x64, 0x20, 0x3c, 0x61, 0x20, 0x68, 0x72, 0x65, 0x66, 0x3d,
-  0x22, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x67, 0x6d,
-  0x6c, 0x2e, 0x61, 0x69, 0x22, 0x3e, 0x67, 0x67, 0x6d, 0x6c, 0x2e, 0x61,
-  0x69, 0x3c, 0x2f, 0x61, 0x3e, 0x2e, 0x3c, 0x2f, 0x70, 0x3e, 0x0a, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66,
-  0x6f, 0x6f, 0x74, 0x65, 0x72, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x3c, 0x2f, 0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20,
-  0x20, 0x20, 0x20, 0x20, 0x60, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
-  0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6e, 0x64, 0x65, 0x72,
-  0x28, 0x68, 0x28, 0x41, 0x70, 0x70, 0x29, 0x2c, 0x20, 0x64, 0x6f, 0x63,
-  0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x62, 0x6f, 0x64, 0x79, 0x29, 0x3b,
-  0x0a, 0x20, 0x20, 0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x3e,
-  0x0a, 0x3c, 0x2f, 0x68, 0x65, 0x61, 0x64, 0x3e, 0x0a, 0x0a, 0x3c, 0x62,
-  0x6f, 0x64, 0x79, 0x3e, 0x0a, 0x3c, 0x2f, 0x62, 0x6f, 0x64, 0x79, 0x3e,
-  0x0a, 0x0a, 0x3c, 0x2f, 0x68, 0x74, 0x6d, 0x6c, 0x3e, 0x0a
+  0x2f, 0x6d, 0x61, 0x69, 0x6e, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x73, 0x65, 0x63, 0x74, 0x69,
+  0x6f, 0x6e, 0x20, 0x69, 0x64, 0x3d, 0x22, 0x77, 0x72, 0x69, 0x74, 0x65,
+  0x22, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x3c, 0x24, 0x7b, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67,
+  0x65, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x7d, 0x20, 0x2f, 0x3e, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x73,
+  0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x3e, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x6f, 0x6f, 0x74,
+  0x65, 0x72, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x70, 0x3e, 0x3c, 0x24, 0x7b, 0x4d, 0x6f,
+  0x64, 0x65, 0x6c, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
+  0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x7d, 0x20, 0x2f, 0x3e, 0x3c, 0x2f, 0x70,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x3c, 0x70, 0x3e, 0x50, 0x6f, 0x77, 0x65, 0x72, 0x65, 0x64,
+  0x20, 0x62, 0x79, 0x20, 0x3c, 0x61, 0x20, 0x68, 0x72, 0x65, 0x66, 0x3d,
+  0x22, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x69, 0x74,
+  0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x67, 0x65, 0x72,
+  0x67, 0x61, 0x6e, 0x6f, 0x76, 0x2f, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e,
+  0x63, 0x70, 0x70, 0x22, 0x3e, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63,
+  0x70, 0x70, 0x3c, 0x2f, 0x61, 0x3e, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x3c,
+  0x61, 0x20, 0x68, 0x72, 0x65, 0x66, 0x3d, 0x22, 0x68, 0x74, 0x74, 0x70,
+  0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x67, 0x6d, 0x6c, 0x2e, 0x61, 0x69, 0x22,
+  0x3e, 0x67, 0x67, 0x6d, 0x6c, 0x2e, 0x61, 0x69, 0x3c, 0x2f, 0x61, 0x3e,
+  0x2e, 0x3c, 0x2f, 0x70, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+  0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f, 0x66, 0x6f, 0x6f, 0x74, 0x65, 0x72,
+  0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x2f,
+  0x64, 0x69, 0x76, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x60,
+  0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
+  0x20, 0x72, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x28, 0x68, 0x28, 0x41, 0x70,
+  0x70, 0x29, 0x2c, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74,
+  0x2e, 0x62, 0x6f, 0x64, 0x79, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x3c, 0x2f,
+  0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x3e, 0x0a, 0x3c, 0x2f, 0x68, 0x65,
+  0x61, 0x64, 0x3e, 0x0a, 0x0a, 0x3c, 0x62, 0x6f, 0x64, 0x79, 0x3e, 0x0a,
+  0x3c, 0x2f, 0x62, 0x6f, 0x64, 0x79, 0x3e, 0x0a, 0x0a, 0x3c, 0x2f, 0x68,
+  0x74, 0x6d, 0x6c, 0x3e, 0x0a
 };
-unsigned int index_html_len = 15082;
+unsigned int index_html_len = 20333;
diff --git a/examples/server/public/index.html b/examples/server/public/index.html
index 1200afc39..f204fff18 100644
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -170,6 +170,136 @@
       grammar: '',
     })
 
+    /* START: Support for storing prompt templates and parameters in borwser LocalStorage */
+
+    const local_storage_storageKey = "llamacpp_server_local_storage";
+
+    function local_storage_setDataFromObject(tag, content) {
+      localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
+    }
+
+    function local_storage_setDataFromRawText(tag, content) {
+      localStorage.setItem(local_storage_storageKey + '/' + tag, content);
+    }
+
+    function local_storage_getDataAsObject(tag) {
+      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
+      if (!item) {
+        return null;
+      } else {
+        return JSON.parse(item);
+      }
+    }
+
+    function local_storage_getDataAsRawText(tag) {
+      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
+      if (!item) {
+        return null;
+      } else {
+        return item;
+      }
+    }
+
+    // create a container for user templates and settings
+
+    const savedUserTemplates = signal({})
+    const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
+
+    // let's import locally saved templates and settings if there are any
+    // user templates and settings are stored in one object
+    // in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
+
+    console.log('Importing saved templates')
+
+    let importedTemplates = local_storage_getDataAsObject('user_templates')
+
+    if (importedTemplates) {
+      // saved templates were successfuly imported.
+
+      console.log('Processing saved templates and updating default template')
+
+      //console.log(importedTemplates);
+      savedUserTemplates.value = importedTemplates;
+
+      //override default template
+      savedUserTemplates.value.default = { session: session.value, params: params.value }
+      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
+    } else {
+      // no saved templates detected.
+
+      console.log('Initializing LocalStorage and saving default template')
+
+      savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
+      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
+    }
+
+    function userTemplateResetToDefault() {
+      console.log('Reseting themplate to default')
+      selectedUserTemplate.value.name = 'default';
+      selectedUserTemplate.value.data = savedUserTemplates.value['default'];
+    }
+
+    function userTemplateApply(t) {
+      session.value = t.data.session;
+      params.value = t.data.params;
+    }
+
+    function userTemplateResetToDefaultAndApply() {
+      userTemplateResetToDefault()
+      userTemplateApply(selectedUserTemplate.value)
+    }
+
+    function userTemplateLoadAndApplyAutosaved() {
+      // get autosaved last used template
+      let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
+
+      if (lastUsedTemplate) {
+
+        console.log('Autosaved template found, restoring')
+
+        selectedUserTemplate.value = lastUsedTemplate
+      }
+      else {
+
+        console.log('No autosaved template found, using default template')
+        // no autosaved last used template was found, so load from default.
+
+        userTemplateResetToDefault()
+      }
+
+      console.log('Applying template')
+      // and update internal data from templates
+
+      userTemplateApply(selectedUserTemplate.value)
+    }
+
+    //console.log(savedUserTemplates.value)
+    //console.log(selectedUserTemplate.value)
+
+    function userTemplateAutosave() {
+      console.log('Template Autosave...')
+      if (selectedUserTemplate.value.name == 'default') {
+        // we don't want to save over default template, so let's create a new one
+        let newTemplateName = 'UserTemplate-' + Date.now().toString()
+        let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
+
+        console.log('Saving as ' + newTemplateName)
+
+        // save in the autosave slot
+        local_storage_setDataFromObject('user_templates_last', newTemplate)
+
+        // and load it back and apply
+        userTemplateLoadAndApplyAutosaved()
+      } else {
+        local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
+      }
+    }
+
+    console.log('Checking for autosaved last used template')
+    userTemplateLoadAndApplyAutosaved()
+
+    /* END: Support for storing prompt templates and parameters in browsers LocalStorage */
+
     const llamaStats = signal(null)
     const controller = signal(null)
 
@@ -346,8 +476,34 @@
         `
       };
 
+      const userTemplateReset = (e) => {
+        e.preventDefault();
+        userTemplateResetToDefaultAndApply()
+      }
+
+      const UserTemplateResetButton = () => {
+        if (selectedUserTemplate.value.name == 'default') {
+          return html`
+            <button disabled>Using default template</button>
+          `
+        }
+
+        return html`
+          <button onclick=${userTemplateReset}>Reset all to default</button>
+        `
+      };
+
+      useEffect(() => {
+        // autosave template on every change
+        userTemplateAutosave()
+      }, [session.value, params.value])
+
       return html`
         <form>
+          <fieldset>
+            <${UserTemplateResetButton}/>
+          </fieldset>
+
           <fieldset>
             <div>
               <label for="prompt">Prompt</label>
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 144372061..a04f1910c 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -15,6 +15,7 @@
 #include "index.html.hpp"
 #include "index.js.hpp"
 #include "completion.js.hpp"
+#include "json-schema-to-grammar.mjs.hpp"
 
 #ifndef SERVER_VERBOSE
 #define SERVER_VERBOSE 1
@@ -278,7 +279,7 @@ struct llama_server_context
             grammar_parser::print_grammar(stderr, parsed_grammar);
 
             {
-                auto it = params.logit_bias.find(llama_token_eos());
+                auto it = params.logit_bias.find(llama_token_eos(ctx));
                 if (it != params.logit_bias.end() && it->second == -INFINITY) {
                     LOG_WARNING("EOS token is disabled, which will cause most grammars to fail", {});
                 }
@@ -401,7 +402,7 @@ struct llama_server_context
         if (params.n_predict == 0)
         {
             has_next_token = false;
-            result.tok = llama_token_eos();
+            result.tok = llama_token_eos(ctx);
             return result;
         }
 
@@ -441,7 +442,7 @@ struct llama_server_context
             llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
 
             // Apply penalties
-            float nl_logit = logits[llama_token_nl()];
+            float nl_logit = logits[llama_token_nl(ctx)];
             auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), params.n_ctx);
             llama_sample_repetition_penalty(ctx, &candidates_p,
                                             last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
@@ -451,7 +452,7 @@ struct llama_server_context
                                                           last_n_repeat, alpha_frequency, alpha_presence);
             if (!penalize_nl)
             {
-                logits[llama_token_nl()] = nl_logit;
+                logits[llama_token_nl(ctx)] = nl_logit;
             }
 
             if (grammar != nullptr) {
@@ -514,7 +515,7 @@ struct llama_server_context
         // decrement remaining sampling budget
         --n_remain;
 
-        if (!embd.empty() && embd.back() == llama_token_eos())
+        if (!embd.empty() && embd.back() == llama_token_eos(ctx))
         {
             // stopping_word = llama_token_to_str(ctx, embd.back());
             has_next_token = false;
@@ -948,7 +949,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
 
 static json format_generation_settings(llama_server_context &llama)
 {
-    const auto eos_bias = llama.params.logit_bias.find(llama_token_eos());
+    const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
     const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
                             eos_bias->second < 0.0f && std::isinf(eos_bias->second);
 
@@ -1083,7 +1084,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
     llama.params.logit_bias.clear();
     if (body.value("ignore_eos", false))
     {
-        llama.params.logit_bias[llama_token_eos()] = -INFINITY;
+        llama.params.logit_bias[llama_token_eos(llama.ctx)] = -INFINITY;
     }
 
     const auto &logit_bias = body.find("logit_bias");
@@ -1199,6 +1200,12 @@ int main(int argc, char **argv)
         res.set_content(reinterpret_cast<const char*>(&completion_js), completion_js_len, "application/javascript");
         return false; });
 
+    // this is only called if no index.html is found in the public --path
+    svr.Get("/json-schema-to-grammar.mjs", [](const Request &, Response &res)
+            {
+        res.set_content(reinterpret_cast<const char*>(&json_schema_to_grammar_mjs), json_schema_to_grammar_mjs_len, "application/javascript");
+        return false; });
+
     svr.Post("/completion", [&llama](const Request &req, Response &res)
              {
         auto lock = llama.lock();
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 476b31b2e..132f7fbf9 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -106,7 +106,7 @@ int main(int argc, char ** argv) {
         new_token_id = llama_sample_token_greedy(ctx , &candidates_p);
 
         // is it an end of stream ?
-        if (new_token_id == llama_token_eos()) {
+        if (new_token_id == llama_token_eos(ctx)) {
             fprintf(stderr, " [end of text]\n");
             break;
         }
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 79599951c..922518da4 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1996,7 +1996,7 @@ void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens)
     }
 }
 
-void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
+void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
     int n_tokens = tokens_input->ne[0];
     int n_vocab  = target_logits->ne[0];
 
@@ -2005,7 +2005,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
 
     ggml_set_f32(target_logits, -1.0f/n_vocab);
     ggml_set_f32(target_probs, 0.0f);
-    ggml_set_i32_1d(tokens_input, 0, llama_token_bos());
+    ggml_set_i32_1d(tokens_input, 0, llama_token_bos(lctx));
     for (int i=1; i<n_tokens+1; ++i) {
         int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
         set_f32_2d(target_logits, token, i-1, +1.0f);
@@ -2016,7 +2016,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
     }
 }
 
-void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
+void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
     GGML_ASSERT(tokens_input->n_dims  == 2);
     GGML_ASSERT(target_logits->n_dims == 3);
     GGML_ASSERT(target_probs->n_dims  == 3);
@@ -2036,7 +2036,7 @@ void get_example_targets_batch(struct llama_context * /*lctx*/, const int * trai
         size_t sample = train_samples[(example_id*n_batch + k) % n_train_samples];
         GGML_ASSERT(sample+n_tokens-1 < n_train_data);
 
-        set_i32_2d(tokens_input, 0, k, llama_token_bos());
+        set_i32_2d(tokens_input, 0, k, llama_token_bos(lctx));
         for (int i=1; i<n_tokens+1; ++i) {
             int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
             // print_token(lctx, token);
@@ -2294,7 +2294,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
     const auto params = sampler->params;
 
     // Apply penalties
-    const float nl_logit = logits[llama_token_nl()];
+    const float nl_logit = logits[llama_token_nl(ctx)];
 
     const int n_last = std::min(std::min(n_last_tokens, params.repeat_last_n), sampler->n_ctx);
 
@@ -2313,7 +2313,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
         params.alpha_presence);
 
     if (!params.penalize_nl) {
-        logits[llama_token_nl()] = nl_logit;
+        logits[llama_token_nl(ctx)] = nl_logit;
     }
 
     llama_token token = 0;
@@ -3181,7 +3181,7 @@ int main(int argc, char ** argv) {
     std::vector<int> train_samples;
     train_samples.push_back(0);
     for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
-        if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
+        if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl(lctx))) {
             train_samples.push_back(i);
         }
     }
@@ -3341,7 +3341,7 @@ int main(int argc, char ** argv) {
         struct ggml_tensor * target_logits = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_vocab,  n_tokens);
         struct ggml_tensor * target_probs  = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_vocab,  n_tokens);
 
-        get_example_targets(train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
+        get_example_targets(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
         for (int i=sample_ctx; i<n_tokens; ++i) {
             ggml_set_i32_1d(tokens_input, i, n_vocab/2);
         }
diff --git a/flake.nix b/flake.nix
index 4178e97ff..616b90252 100644
--- a/flake.nix
+++ b/flake.nix
@@ -14,8 +14,6 @@
             with pkgs.darwin.apple_sdk_11_0.frameworks; [
               Accelerate
               MetalKit
-              MetalPerformanceShaders
-              MetalPerformanceShadersGraph
             ]
           else if isAarch32 && isDarwin then
             with pkgs.darwin.apple_sdk.frameworks; [
diff --git a/ggml-alloc.c b/ggml-alloc.c
index 4121f3dba..3ee98d03d 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -67,6 +67,8 @@ struct ggml_allocr {
     struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
     size_t max_size;
     bool measure;
+    int parse_seq[GGML_MAX_NODES];
+    bool has_parse_seq;
 
 #ifdef GGML_ALLOCATOR_DEBUG
     struct ggml_tensor * allocated_tensors[1024];
@@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
 
     size_t max_avail = 0;
 
-    // find the best fitting free block
+    // find the best fitting free block besides the last block
     int best_fit_block = -1;
     size_t best_fit_size = SIZE_MAX;
-    for (int i = 0; i < alloc->n_free_blocks; i++) {
+    for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
         struct free_block * block = &alloc->free_blocks[i];
         max_avail = MAX(max_avail, block->size);
         if (block->size >= size && block->size <= best_fit_size) {
@@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
     AT_PRINTF("block %d\n", best_fit_block);
 
     if (best_fit_block == -1) {
-        fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
-                __func__, size, max_avail);
-        GGML_ASSERT(!"not enough space in the buffer");
+        // the last block is our last resort
+        struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
+        if (block->size >= size) {
+            best_fit_block = alloc->n_free_blocks - 1;
+            max_avail = MAX(max_avail, block->size);
+        } else {
+            fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
+                    __func__, size, max_avail);
+            GGML_ASSERT(!"not enough space in the buffer");
         return;
+        }
     }
     struct free_block * block = &alloc->free_blocks[best_fit_block];
     void * addr = block->addr;
@@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
     alloc->n_free_blocks++;
 }
 
+void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
+    int pos = 0;
+    for (int i = 0; i < n; i++) {
+        if (list[i] != -1) {
+            alloc->parse_seq[pos] = list[i];
+            pos++;
+        }
+    }
+    alloc->has_parse_seq = true;
+}
+
 void ggml_allocr_reset(struct ggml_allocr * alloc) {
     alloc->n_free_blocks = 1;
     size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
@@ -248,6 +268,8 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
         /*.hash_table    = */ {{0}},
         /*.max_size      = */ 0,
         /*.measure       = */ false,
+        /*.parse_seq     = */ {0},
+        /*.has_parse_seq = */ false,
 #ifdef GGML_ALLOCATOR_DEBUG
         /*.allocated_tensors = */ = {0},
 #endif
@@ -275,6 +297,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
         /*.hash_table    = */ {{0}},
         /*.max_size      = */ 0,
         /*.measure       = */ true,
+        /*.parse_seq     = */ {0},
+        /*.has_parse_seq = */ false,
 #ifdef GGML_ALLOCATOR_DEBUG
         /*.allocated_tensors = */ = {0},
 #endif
@@ -473,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
                 allocate_node(alloc, input);
             }
         }
-        for (int i = 0; i < gf->n_nodes; i++) {
+        for (int ind = 0; ind < gf->n_nodes; ind++) {
+            int i;
+            if (alloc->has_parse_seq) {
+                i = alloc->parse_seq[ind];
+            } else {
+                i = ind;
+            }
             struct ggml_tensor * node = gf->nodes[i];
 
             // allocate parents (leafs)
diff --git a/ggml-alloc.h b/ggml-alloc.h
index a5ec8f87a..14a4350ac 100644
--- a/ggml-alloc.h
+++ b/ggml-alloc.h
@@ -10,6 +10,10 @@ extern "C" {
 GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
 GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
 
+// tell the allocator to parse nodes following the order described in the list
+// you should call this if your graph are optimized to execute out-of-order
+GGML_API void   ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
+
 GGML_API void   ggml_allocr_free(struct ggml_allocr * alloc);
 GGML_API bool   ggml_allocr_is_measure(struct ggml_allocr * alloc);
 GGML_API void   ggml_allocr_reset(struct ggml_allocr * alloc);
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index df0cbe18f..5b415c646 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6469,3 +6469,15 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
     func(tensor->src[0], tensor->src[1], tensor);
     return true;
 }
+
+int ggml_cuda_get_device_count() {
+    int device_count;
+    CUDA_CHECK(cudaGetDeviceCount(&device_count));
+    return device_count;
+}
+
+void ggml_cuda_get_device_description(int device, char * description, size_t description_size) {
+    cudaDeviceProp prop;
+    CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
+    snprintf(description, description_size, "%s", prop.name);
+}
diff --git a/ggml-cuda.h b/ggml-cuda.h
index 72d7afa46..cad05f5fa 100644
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@@ -8,29 +8,25 @@ extern "C" {
 
 #define GGML_CUDA_MAX_DEVICES       16
 
-void   ggml_init_cublas(void);
-void   ggml_cuda_set_tensor_split(const float * tensor_split);
+GGML_API void   ggml_init_cublas(void);
+GGML_API void * ggml_cuda_host_malloc(size_t size);
+GGML_API void   ggml_cuda_host_free(void * ptr);
 
-void   ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-bool   ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
-void   ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
+GGML_API bool   ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
+GGML_API void   ggml_cuda_set_tensor_split(const float * tensor_split);
+GGML_API void   ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_free_data(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
+GGML_API void   ggml_cuda_set_main_device(int main_device);
+GGML_API void   ggml_cuda_set_mul_mat_q(bool mul_mat_q);
+GGML_API void   ggml_cuda_set_scratch_size(size_t scratch_size);
+GGML_API void   ggml_cuda_free_scratch(void);
+GGML_API bool   ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
 
-// TODO: export these with GGML_API
-void * ggml_cuda_host_malloc(size_t size);
-void   ggml_cuda_host_free(void * ptr);
-
-void   ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
-
-void   ggml_cuda_free_data(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
-void   ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
-void   ggml_cuda_set_main_device(int main_device);
-void   ggml_cuda_set_mul_mat_q(bool mul_mat_q);
-void   ggml_cuda_set_scratch_size(size_t scratch_size);
-void   ggml_cuda_free_scratch(void);
-bool   ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+GGML_API int    ggml_cuda_get_device_count(void);
+GGML_API void   ggml_cuda_get_device_description(int device, char * description, size_t description_size);
 
 #ifdef  __cplusplus
 }
diff --git a/ggml-metal.h b/ggml-metal.h
index 1a5d96c33..00202b787 100644
--- a/ggml-metal.h
+++ b/ggml-metal.h
@@ -66,10 +66,13 @@ void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
 
 // try to find operations that can be run concurrently in the graph
 // you should run it again if the topology of your graph changes
-void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
+void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
 
-// if the graph has been optimized for concurrently dispatch
-bool ggml_metal_if_optimized(struct ggml_metal_context * ctx);
+// if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
+int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
+
+// output the concur_list for ggml_alloc
+int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
 
 // same as ggml_graph_compute but uses Metal
 // creates gf->n_threads command buffers in parallel
diff --git a/ggml-metal.m b/ggml-metal.m
index 9cc4d9e03..835c5f297 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -5,7 +5,6 @@
 #import <Foundation/Foundation.h>
 
 #import <Metal/Metal.h>
-#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
 
 #undef MIN
 #undef MAX
@@ -79,6 +78,14 @@ struct ggml_metal_context {
     GGML_METAL_DECL_KERNEL(mul_mat_q4_K_f32);
     GGML_METAL_DECL_KERNEL(mul_mat_q5_K_f32);
     GGML_METAL_DECL_KERNEL(mul_mat_q6_K_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_f16_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q4_1_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q2_K_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q3_K_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q4_K_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q5_K_f32);
+    GGML_METAL_DECL_KERNEL(mul_mm_q6_K_f32);
     GGML_METAL_DECL_KERNEL(rope);
     GGML_METAL_DECL_KERNEL(alibi_f32);
     GGML_METAL_DECL_KERNEL(cpy_f32_f16);
@@ -110,13 +117,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
     ctx->n_buffers = 0;
     ctx->concur_list_len = 0;
 
-    // determine if we can use MPS
-    if (MPSSupportsMTLDevice(ctx->device)) {
-        fprintf(stderr, "%s: using MPS\n", __func__);
-    } else {
-        fprintf(stderr, "%s: not using MPS\n", __func__);
-        GGML_ASSERT(false && "MPS not supported");
-    }
 
 #if 0
     // compile from source string and show compile log
@@ -163,10 +163,15 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
 
     // load kernels
     {
+        NSError * error = nil;
 #define GGML_METAL_ADD_KERNEL(name) \
         ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
-        ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:nil]; \
-        fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name);
+        ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
+        fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name); \
+        if (error) { \
+            fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+            return NULL; \
+        }
 
         GGML_METAL_ADD_KERNEL(add);
         GGML_METAL_ADD_KERNEL(add_row);
@@ -196,6 +201,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
         GGML_METAL_ADD_KERNEL(mul_mat_q4_K_f32);
         GGML_METAL_ADD_KERNEL(mul_mat_q5_K_f32);
         GGML_METAL_ADD_KERNEL(mul_mat_q6_K_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_f16_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q4_0_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q4_1_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q2_K_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q3_K_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q4_K_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q5_K_f32);
+        GGML_METAL_ADD_KERNEL(mul_mm_q6_K_f32);
         GGML_METAL_ADD_KERNEL(rope);
         GGML_METAL_ADD_KERNEL(alibi_f32);
         GGML_METAL_ADD_KERNEL(cpy_f32_f16);
@@ -243,11 +256,12 @@ void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
     ctx->n_cb = n_cb;
 }
 
-bool ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
-    if (ctx->concur_list_len) {
-        return true;
-    }
-    return false;
+int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
+    return ctx->concur_list_len;
+}
+
+int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
+    return ctx->concur_list;
 }
 
 // finds the Metal buffer that contains the tensor data on the GPU device
@@ -390,7 +404,7 @@ void ggml_metal_get_tensor(
 
 void ggml_metal_graph_find_concurrency(
         struct ggml_metal_context * ctx,
-        struct ggml_cgraph * gf) {
+        struct ggml_cgraph * gf, bool check_mem) {
     int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
     int nodes_unused[GGML_MAX_CONCUR];
 
@@ -437,7 +451,7 @@ void ggml_metal_graph_find_concurrency(
                         }
                     }
                 }
-                if (exe_flag) {
+                if (exe_flag && check_mem) {
                     // check if nodes[i]'s data will be overwritten by a node before nodes[i].
                     // if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
                     int64_t data_start = (int64_t) gf->nodes[i]->data;
@@ -521,7 +535,7 @@ void ggml_metal_graph_compute(
 
             id<MTLCommandBuffer> command_buffer = command_buffers[cb_idx];
 
-            id<MTLComputeCommandEncoder> encoder = nil;
+            id<MTLComputeCommandEncoder> encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
 
             const int node_start =                                  (cb_idx + 0) * n_nodes_per_cb;
             const int node_end   = (cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb;
@@ -530,10 +544,6 @@ void ggml_metal_graph_compute(
                 const int i = has_concur ? ctx->concur_list[ind] : ind;
 
                 if (i == -1) {
-                    if (encoder == nil) {
-                        encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                        continue;
-                    }
                     [encoder memoryBarrierWithScope:MTLBarrierScopeBuffers];
                     continue;
                 }
@@ -607,10 +617,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_ADD:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             if (ggml_nelements(src1) == ne10) {
                                 // src1 is a row
                                 [encoder setComputePipelineState:ctx->pipeline_add_row];
@@ -628,10 +634,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_MUL:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             if (ggml_nelements(src1) == ne10) {
                                 // src1 is a row
                                 [encoder setComputePipelineState:ctx->pipeline_mul_row];
@@ -649,10 +651,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_SCALE:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const float scale = *(const float *) src1->data;
 
                             [encoder setComputePipelineState:ctx->pipeline_scale];
@@ -668,10 +666,6 @@ void ggml_metal_graph_compute(
                         switch (ggml_get_unary_op(gf->nodes[i])) {
                             case GGML_UNARY_OP_SILU:
                                 {
-                                    if (encoder == nil) {
-                                        encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                                    }
-
                                     [encoder setComputePipelineState:ctx->pipeline_silu];
                                     [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
                                     [encoder setBuffer:id_dst  offset:offs_dst  atIndex:1];
@@ -682,10 +676,6 @@ void ggml_metal_graph_compute(
                                 } break;
                             case GGML_UNARY_OP_RELU:
                                 {
-                                    if (encoder == nil) {
-                                        encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                                    }
-
                                     [encoder setComputePipelineState:ctx->pipeline_relu];
                                     [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
                                     [encoder setBuffer:id_dst  offset:offs_dst  atIndex:1];
@@ -696,10 +686,6 @@ void ggml_metal_graph_compute(
                                 } break;
                             case GGML_UNARY_OP_GELU:
                                 {
-                                    if (encoder == nil) {
-                                        encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                                    }
-
                                     [encoder setComputePipelineState:ctx->pipeline_gelu];
                                     [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
                                     [encoder setBuffer:id_dst  offset:offs_dst  atIndex:1];
@@ -716,10 +702,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_SOFT_MAX:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const int nth = 32;
 
                             [encoder setComputePipelineState:ctx->pipeline_soft_max];
@@ -734,10 +716,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_DIAG_MASK_INF:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const int n_past = ((int32_t *)(dst->op_params))[0];
 
                             [encoder setComputePipelineState:ctx->pipeline_diag_mask_inf];
@@ -755,53 +733,43 @@ void ggml_metal_graph_compute(
 
                             GGML_ASSERT(ne00 == ne10);
                             // GGML_ASSERT(ne02 == ne12); // Should be checked on individual data types until broadcast is implemented everywhere
+                            uint gqa = ne12/ne02;
                             GGML_ASSERT(ne03 == ne13);
 
+                            // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
+                            // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
                             if (ggml_is_contiguous(src0) &&
                                 ggml_is_contiguous(src1) &&
-                                (src0t == GGML_TYPE_F32 || src0t == GGML_TYPE_F16) && ne11 > 1) {
-
-                                if (encoder != nil) {
-                                    [encoder endEncoding];
-                                    encoder = nil;
+                                src1t == GGML_TYPE_F32 &&
+                                [ctx->device supportsFamily:MTLGPUFamilyApple7] &&
+                                ne00%32 == 0 &&
+                                ne11 > 1) {
+                                    switch (src0->type) {
+                                        case GGML_TYPE_F16:  [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break;
+                                        case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_0_f32]; break;
+                                        case GGML_TYPE_Q4_1: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_1_f32]; break;
+                                        case GGML_TYPE_Q2_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q2_K_f32]; break;
+                                        case GGML_TYPE_Q3_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q3_K_f32]; break;
+                                        case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_K_f32]; break;
+                                        case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q5_K_f32]; break;
+                                        case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q6_K_f32]; break;
+                                        default: GGML_ASSERT(false && "MUL MAT-MAT not implemented");
+                                    }
+                                    [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
+                                    [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
+                                    [encoder setBuffer:id_dst  offset:offs_dst  atIndex:2];
+                                    [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
+                                    [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
+                                    [encoder setBytes:&nb01 length:sizeof(nb01) atIndex:5];
+                                    [encoder setBytes:&nb02 length:sizeof(nb02) atIndex:6];
+                                    [encoder setBytes:&ne12 length:sizeof(ne12) atIndex:7];
+                                    [encoder setBytes:&ne0 length:sizeof(ne0) atIndex:8];
+                                    [encoder setBytes:&ne1 length:sizeof(ne1) atIndex:9];
+                                    [encoder setBytes:&gqa length:sizeof(gqa) atIndex:10];
+                                    [encoder setThreadgroupMemoryLength:8192 atIndex:0];
+                                    [encoder dispatchThreadgroups:MTLSizeMake( (ne11+31)/32, (ne01+63) / 64, ne12) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
                                 }
-
-                                MPSDataType src0dt = src0t == GGML_TYPE_F32 ? MPSDataTypeFloat32 : MPSDataTypeFloat16;
-                                MPSDataType src1dt = src1t == GGML_TYPE_F32 ? MPSDataTypeFloat32 : MPSDataTypeFloat16;
-
-                                // for F32 x F32 we use MPS
-                                MPSMatrixDescriptor * desc0 = [MPSMatrixDescriptor
-                                    matrixDescriptorWithRows:ne01 columns:ne00 rowBytes:src0->nb[1] dataType:src0dt];
-
-                                MPSMatrixDescriptor * desc1 = [MPSMatrixDescriptor
-                                    matrixDescriptorWithRows:ne11 columns:ne10 rowBytes:src1->nb[1] dataType:src1dt];
-
-                                MPSMatrixDescriptor * desc  = [MPSMatrixDescriptor
-                                    matrixDescriptorWithRows:ne1 columns:ne0 rowBytes:dst->nb[1] dataType:MPSDataTypeFloat32];
-
-                                MPSMatrixMultiplication * mul = [[MPSMatrixMultiplication alloc]
-                                    initWithDevice:ctx->device transposeLeft:false transposeRight:true
-                                        resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0];
-
-                                // we need to do ne12 multiplications
-                                // TODO: is there a way to do this in parallel - currently very slow ..
-                                // TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS
-                                for (int64_t i02 = 0; i02 < ne12; ++i02) {
-                                    size_t offs_src0_cur = offs_src0 + i02/(ne12/ne02)*nb02; // gqa not used for now
-                                    size_t offs_src1_cur = offs_src1 + i02*nb12;
-                                    size_t offs_dst_cur  = offs_dst  + i02*nb2;
-
-                                    MPSMatrix * mat_src0 = [[MPSMatrix alloc] initWithBuffer:id_src0 offset:offs_src0_cur descriptor:desc0];
-                                    MPSMatrix * mat_src1 = [[MPSMatrix alloc] initWithBuffer:id_src1 offset:offs_src1_cur descriptor:desc1];
-                                    MPSMatrix * mat_dst  = [[MPSMatrix alloc] initWithBuffer:id_dst  offset:offs_dst_cur  descriptor:desc ];
-
-                                    [mul encodeToCommandBuffer:command_buffer leftMatrix:mat_src1 rightMatrix:mat_src0 resultMatrix:mat_dst];
-                                }
-                            } else {
-                                if (encoder == nil) {
-                                    encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                                }
-
+                            else {
                                 int nth0 = 32;
                                 int nth1 = 1;
 
@@ -900,23 +868,24 @@ void ggml_metal_graph_compute(
                                 [encoder setBytes:&nb12 length:sizeof(nb12) atIndex:14];
                                 [encoder setBytes:&ne0  length:sizeof(ne0)  atIndex:15];
                                 [encoder setBytes:&ne1  length:sizeof(ne1)  atIndex:16];
+                                [encoder setBytes:&gqa length:sizeof(gqa) atIndex:17];
 
                                 if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
                                     src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) {
-                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 }
                                 else if (src0t == GGML_TYPE_Q3_K) {
 #ifdef GGML_QKK_64
-                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
 #else
-                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+3)/4, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+3)/4, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
 #endif
                                 }
                                 else if (src0t == GGML_TYPE_Q5_K) {
-                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3) / 4, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3) / 4, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 }
                                 else if (src0t == GGML_TYPE_Q6_K) {
-                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 } else {
                                     [encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
                                     [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
@@ -925,10 +894,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_GET_ROWS:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             switch (src0->type) {
                                 case GGML_TYPE_F16:  [encoder setComputePipelineState:ctx->pipeline_get_rows_f16]; break;
                                 case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_get_rows_q4_0]; break;
@@ -954,10 +919,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_RMS_NORM:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             float eps;
                             memcpy(&eps, dst->op_params, sizeof(float));
 
@@ -977,10 +938,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_NORM:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const float eps = 1e-5f;
 
                             const int nth = 256;
@@ -999,10 +956,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_ALIBI:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             GGML_ASSERT((src0t == GGML_TYPE_F32));
 
                             const int n_past = ((int32_t *) dst->op_params)[0]; UNUSED(n_past);
@@ -1042,10 +995,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_ROPE:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const int n_past = ((int32_t *) dst->op_params)[0];
                             const int n_dims = ((int32_t *) dst->op_params)[1];
                             const int mode   = ((int32_t *) dst->op_params)[2];
@@ -1086,10 +1035,6 @@ void ggml_metal_graph_compute(
                     case GGML_OP_CPY:
                     case GGML_OP_CONT:
                         {
-                            if (encoder == nil) {
-                                encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
-                            }
-
                             const int nth = 32;
 
                             switch (src0t) {
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 8d26b5ec2..3f3125236 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -18,47 +18,6 @@ typedef struct {
     uint8_t qs[QK4_1 / 2];  // nibbles / quants
 } block_q4_1;
 
-static void dequantize_row_q4_0(device const block_q4_0 * x, device float * y, int k) {
-    const int qk = QK4_0;
-
-    assert(k % qk == 0);
-
-    const int nb = k / qk;
-
-    for (int i = 0; i < nb; i++) {
-        const half d = x[i].d;
-
-        for (int j = 0; j < qk/2; ++j) {
-            const int x0 = (x[i].qs[j] & 0x0F) - 8;
-            const int x1 = (x[i].qs[j] >>   4) - 8;
-
-            y[i*qk + j + 0   ] = x0*d;
-            y[i*qk + j + qk/2] = x1*d;
-        }
-    }
-}
-
-static void dequantize_row_q4_1(device const block_q4_1 * x, device float * y, int k) {
-    const int qk = QK4_1;
-
-    assert(k % qk == 0);
-
-    const int nb = k / qk;
-
-    for (int i = 0; i < nb; i++) {
-        const half d = x[i].d;
-        const half m = x[i].m;
-
-        for (int j = 0; j < qk/2; ++j) {
-            const int x0 = (x[i].qs[j] & 0x0F);
-            const int x1 = (x[i].qs[j] >>   4);
-
-            y[i*qk + j + 0   ] = x0*d + m;
-            y[i*qk + j + qk/2] = x1*d + m;
-        }
-    }
-}
-
 kernel void kernel_add(
         device const float * src0,
         device const float * src1,
@@ -219,54 +178,6 @@ kernel void kernel_diag_mask_inf(
     }
 }
 
-kernel void kernel_get_rows_f16(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    for (int j = 0; j < ne00; j++) {
-        dst[i*nb1 + j] = ((device half *) ((device char *) src0 + r*nb01))[j];
-    }
-}
-
-kernel void kernel_get_rows_q4_0(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q4_0(
-            (device const block_q4_0 *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
-kernel void kernel_get_rows_q4_1(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q4_1(
-            (device const block_q4_1 *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
 kernel void kernel_norm(
         device const  void * src0,
         device       float * dst,
@@ -432,14 +343,16 @@ inline float block_q_n_dot_y(device const block_q4_1 * qb_curr, float sumy, thre
 //      N_DST, so this is another explicit assumption of the implementation.
 template<typename block_q_type, int nr, int nsg, int nw>
 void mul_vec_q_n_f32(device const void * src0, device const float * src1, device float * dst,
-                    int64_t ne00, int64_t ne10, int64_t ne0, int64_t ne01,
-                    uint2 tgpig, uint tiisg, uint sgitg) {
+                    int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne10, int64_t ne12, int64_t ne0, int64_t ne1, uint gqa,
+                    uint3 tgpig, uint tiisg, uint sgitg) {
     const int nb = ne00/QK4_0;
     const int r0 = tgpig.x;
     const int r1 = tgpig.y;
+    const int im = tgpig.z;
     const int first_row = (r0 * nsg + sgitg) * nr;
-    device const block_q_type * x = (device const block_q_type *) src0 + first_row * nb;
-    device const float      * y = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = first_row * nb + im/gqa*(nb*ne0);
+    device const block_q_type * x = (device const block_q_type *) src0 + offset0;
+    device const float      * y = (device const float      *) src1 + r1*ne10 + im*ne00*ne1;
     float yl[16];       // src1 vector cache
     float sumf[nr]={0.f};
 
@@ -470,7 +383,7 @@ void mul_vec_q_n_f32(device const void * src0, device const float * src1, device
     for (int row = 0; row < nr; ++row) {
         const float tot = simd_sum(sumf[row]);
         if (tiisg == 0 && first_row + row < ne01) {
-            dst[r1*ne0 + first_row + row] = tot;
+            dst[r1*ne0 + im*ne0*ne1 + first_row + row] = tot;
         }
     }
 }
@@ -480,13 +393,17 @@ kernel void kernel_mul_mat_q4_0_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
         constant   int64_t & ne01[[buffer(4)]],
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
-    mul_vec_q_n_f32<block_q4_0, N_DST, N_SIMDGROUP, N_SIMDWIDTH>(src0,src1,dst,ne00,ne10,ne0,ne01,tgpig,tiisg,sgitg);
+    mul_vec_q_n_f32<block_q4_0, N_DST, N_SIMDGROUP, N_SIMDWIDTH>(src0,src1,dst,ne00,ne01,ne02,ne10,ne12,ne0,ne1,gqa,tgpig,tiisg,sgitg);
 }
 
 kernel void kernel_mul_mat_q4_1_f32(
@@ -494,13 +411,17 @@ kernel void kernel_mul_mat_q4_1_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
         constant   int64_t & ne01[[buffer(4)]],
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
-     mul_vec_q_n_f32<block_q4_1, N_DST, N_SIMDGROUP, N_SIMDWIDTH>(src0,src1,dst,ne00,ne10,ne0,ne01,tgpig,tiisg,sgitg);
+     mul_vec_q_n_f32<block_q4_1, N_DST, N_SIMDGROUP, N_SIMDWIDTH>(src0,src1,dst,ne00,ne01,ne02,ne10,ne12,ne0,ne1,gqa,tgpig,tiisg,sgitg);
 }
 
 kernel void kernel_mul_mat_f16_f32(
@@ -869,354 +790,6 @@ static inline uchar4 get_scale_min_k4(int j, device const uint8_t * q) {
     return r;
 }
 
-//========================================== dequantization =============================
-
-static void dequantize_row_q2_K(device const block_q2_K * x, device float * y, int k) {
-    assert(k % QK_K == 0);
-    const int nb = k / QK_K;
-
-    for (int i = 0; i < nb; i++) {
-
-        const float d = x[i].d;
-        const float min = x[i].dmin;
-
-        device const uint8_t * q = x[i].qs;
-
-#if QK_K == 256
-        int is = 0;
-        float dl, ml;
-        for (int n = 0; n < QK_K; n += 128) {
-            int shift = 0;
-            for (int j = 0; j < 4; ++j) {
-
-                uint8_t sc = x[i].scales[is++];
-                dl = d * (sc & 0xF); ml = min * (sc >> 4);
-                for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l] >> shift) & 3)) - ml;
-
-                sc = x[i].scales[is++];
-                dl = d * (sc & 0xF); ml = min * (sc >> 4);
-                for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3)) - ml;
-
-                shift += 2;
-            }
-            q += 32;
-        }
-#else
-        float dl1 = d * (x[i].scales[0] & 0xF), ml1 = min * (x[i].scales[0] >> 4);
-        float dl2 = d * (x[i].scales[1] & 0xF), ml2 = min * (x[i].scales[1] >> 4);
-        float dl3 = d * (x[i].scales[2] & 0xF), ml3 = min * (x[i].scales[2] >> 4);
-        float dl4 = d * (x[i].scales[3] & 0xF), ml4 = min * (x[i].scales[3] >> 4);
-        for (int l = 0; l < 16; ++l) {
-            y[l+ 0] = dl1 * ((q[l] >> 0) & 3) - ml1;
-            y[l+16] = dl2 * ((q[l] >> 2) & 3) - ml2;
-            y[l+32] = dl3 * ((q[l] >> 4) & 3) - ml3;
-            y[l+48] = dl4 * ((q[l] >> 6) & 3) - ml4;
-        }
-        y += QK_K;
-#endif
-
-    }
-}
-
-static void dequantize_row_q3_K(device const block_q3_K * x, device float * y, int k) {
-    assert(k % QK_K == 0);
-    const int nb = k / QK_K;
-
-#if QK_K == 256
-
-    const uint16_t kmask1 = 0x0303;
-    const uint16_t kmask2 = 0x0f0f;
-
-    uint16_t aux[8];
-    thread const int8_t * scales = (thread const int8_t*)aux;
-
-    for (int i = 0; i < nb; i++) {
-
-        const float d_all = (float)(x[i].d);
-
-        device const uint8_t * q = x[i].qs;
-        device const uint8_t * h = x[i].hmask;
-        uint8_t m = 1;
-
-        device const uint16_t * a = (device const uint16_t *)x[i].scales;
-        aux[0] = (a[0] & kmask2) | (((a[4] >> 0) & kmask1) << 4);
-        aux[1] = (a[1] & kmask2) | (((a[5] >> 0) & kmask1) << 4);
-        aux[2] = (a[2] & kmask2) | (((a[4] >> 2) & kmask1) << 4);
-        aux[3] = (a[3] & kmask2) | (((a[5] >> 2) & kmask1) << 4);
-        aux[4] = ((a[0] >> 4) & kmask2) | (((a[4] >> 4) & kmask1) << 4);
-        aux[5] = ((a[1] >> 4) & kmask2) | (((a[5] >> 4) & kmask1) << 4);
-        aux[6] = ((a[2] >> 4) & kmask2) | (((a[4] >> 6) & kmask1) << 4);
-        aux[7] = ((a[3] >> 4) & kmask2) | (((a[5] >> 6) & kmask1) << 4);
-
-        int is = 0;
-        float dl;
-        for (int n = 0; n < QK_K; n += 128) {
-            int shift = 0;
-            for (int j = 0; j < 4; ++j) {
-
-                dl = d_all * (scales[is++] - 32);
-                for (int l = 0; l < 16; ++l) {
-                    *y++ = dl * ((int8_t)((q[l+ 0] >> shift) & 3) - ((h[l+ 0] & m) ? 0 : 4));
-                }
-
-                dl = d_all * (scales[is++] - 32);
-                for (int l = 0; l < 16; ++l) {
-                    *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3) - ((h[l+16] & m) ? 0 : 4));
-                }
-
-                shift += 2;
-                m <<= 1;
-            }
-            q += 32;
-        }
-    }
-#else
-    for (int i = 0; i < nb; i++) {
-
-        const float d_all = (float)(x[i].d);
-
-        device const uint8_t * q = x[i].qs;
-        device const uint8_t * hm = x[i].hmask;
-
-        const float d1 = d_all * ((x[i].scales[0] & 0xF) - 8);
-        const float d2 = d_all * ((x[i].scales[0] >>  4) - 8);
-        const float d3 = d_all * ((x[i].scales[1] & 0xF) - 8);
-        const float d4 = d_all * ((x[i].scales[1] >>  4) - 8);
-
-        for (int l = 0; l < 8; ++l) {
-            uint8_t h = hm[l];
-            y[l+ 0] = d1 * ((int8_t)((q[l+0] >> 0) & 3) - ((h & 0x01) ? 0 : 4));
-            y[l+ 8] = d1 * ((int8_t)((q[l+8] >> 0) & 3) - ((h & 0x02) ? 0 : 4));
-            y[l+16] = d2 * ((int8_t)((q[l+0] >> 2) & 3) - ((h & 0x04) ? 0 : 4));
-            y[l+24] = d2 * ((int8_t)((q[l+8] >> 2) & 3) - ((h & 0x08) ? 0 : 4));
-            y[l+32] = d3 * ((int8_t)((q[l+0] >> 4) & 3) - ((h & 0x10) ? 0 : 4));
-            y[l+40] = d3 * ((int8_t)((q[l+8] >> 4) & 3) - ((h & 0x20) ? 0 : 4));
-            y[l+48] = d4 * ((int8_t)((q[l+0] >> 6) & 3) - ((h & 0x40) ? 0 : 4));
-            y[l+56] = d4 * ((int8_t)((q[l+8] >> 6) & 3) - ((h & 0x80) ? 0 : 4));
-        }
-        y += QK_K;
-    }
-#endif
-
-}
-
-static void dequantize_row_q4_K(device const block_q4_K * x, device float * y, int k) {
-    assert(k % QK_K == 0);
-    const int nb = k / QK_K;
-
-    for (int i = 0; i < nb; i++) {
-
-        device const uint8_t * q = x[i].qs;
-
-#if QK_K == 256
-        const float d = x[i].d;
-        const float min = x[i].dmin;
-
-        device const uint8_t * scales = x[i].scales;
-
-        int is = 0;
-        for (int j = 0; j < QK_K; j += 64) {
-            const uchar4 sc = get_scale_min_k4(is, scales);
-            const float d1 = d * sc[0]; const float m1 = min * sc[1];
-            const float d2 = d * sc[2]; const float m2 = min * sc[3];
-            for (int l = 0; l < 32; ++l) *y++ = d1 * (q[l] & 0xF) - m1;
-            for (int l = 0; l < 32; ++l) *y++ = d2 * (q[l]  >> 4) - m2;
-            q += 32; is += 2;
-        }
-#else
-        device const uint8_t * s = x[i].scales;
-        device const half2 * dh = (device const half2 *)x[i].d;
-        const float2 d = (float2)dh[0];
-        const float d1 = d[0] * (s[0] & 0xF);
-        const float d2 = d[0] * (s[1] & 0xF);
-        const float m1 = d[1] * (s[0] >>  4);
-        const float m2 = d[1] * (s[1] >>  4);
-        for (int l = 0; l < 32; ++l) {
-            y[l+ 0] = d1 * (q[l] & 0xF) - m1;
-            y[l+32] = d2 * (q[l] >>  4) - m2;
-        }
-        y += QK_K;
-#endif
-
-    }
-}
-
-static void dequantize_row_q5_K(device const block_q5_K * x, device float * y, int k) {
-    assert(k % QK_K == 0);
-    const int nb = k / QK_K;
-
-#if QK_K == 256
-   for (int i = 0; i < nb; i++) {
-
-        const float d = (float)(x[i].d);
-        const float min = (float)(x[i].dmin);
-
-        device const uint8_t * ql = x[i].qs;
-        device const uint8_t * qh = x[i].qh;
-
-        int is = 0;
-        uint8_t u1 = 1, u2 = 2;
-        for (int j = 0; j < QK_K; j += 64) {
-            const uchar4 sc = get_scale_min_k4(is, x[i].scales);
-            const float d1 = d * sc[0]; const float m1 = min * sc[1];
-            const float d2 = d * sc[2]; const float m2 = min * sc[3];
-            for (int l = 0; l < 32; ++l) *y++ = d1 * ((ql[l] & 0xF) + (qh[l] & u1 ? 16 : 0)) - m1;
-            for (int l = 0; l < 32; ++l) *y++ = d2 * ((ql[l]  >> 4) + (qh[l] & u2 ? 16 : 0)) - m2;
-            ql += 32; is += 2;
-            u1 <<= 2; u2 <<= 2;
-        }
-    }
-#else
-    for (int i = 0; i < nb; i++) {
-
-        const float d = (float)x[i].d;
-
-        device const uint8_t * ql = x[i].qs;
-        device const uint8_t * qh = x[i].qh;
-        device const int8_t  * sc = x[i].scales;
-
-        for (int l = 0; l < 8; ++l) {
-            y[l+ 0] = d * sc[0] * ((ql[l+ 0] & 0xF) - (qh[l] & 0x01 ? 0 : 16));
-            y[l+ 8] = d * sc[0] * ((ql[l+ 8] & 0xF) - (qh[l] & 0x02 ? 0 : 16));
-            y[l+16] = d * sc[1] * ((ql[l+16] & 0xF) - (qh[l] & 0x04 ? 0 : 16));
-            y[l+24] = d * sc[1] * ((ql[l+24] & 0xF) - (qh[l] & 0x08 ? 0 : 16));
-            y[l+32] = d * sc[2] * ((ql[l+ 0] >>  4) - (qh[l] & 0x10 ? 0 : 16));
-            y[l+40] = d * sc[2] * ((ql[l+ 8] >>  4) - (qh[l] & 0x20 ? 0 : 16));
-            y[l+48] = d * sc[3] * ((ql[l+16] >>  4) - (qh[l] & 0x40 ? 0 : 16));
-            y[l+56] = d * sc[3] * ((ql[l+24] >>  4) - (qh[l] & 0x80 ? 0 : 16));
-        }
-        y += QK_K;
-    }
-#endif
-
-}
-
-static void dequantize_row_q6_K(device const block_q6_K * x, device float * y, int k) {
-    assert(k % QK_K == 0);
-    const int nb = k / QK_K;
-
-    for (int i = 0; i < nb; i++) {
-
-        device const uint8_t * ql = x[i].ql;
-        device const uint8_t * qh = x[i].qh;
-        device const int8_t  * sc = x[i].scales;
-
-        const float d = x[i].d;
-
-#if QK_K == 256
-        for (int n = 0; n < QK_K; n += 128) {
-            for (int l = 0; l < 32; ++l) {
-                int is = l/16;
-                const int8_t q1 = (int8_t)((ql[l +  0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
-                const int8_t q2 = (int8_t)((ql[l + 32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32;
-                const int8_t q3 = (int8_t)((ql[l +  0]  >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32;
-                const int8_t q4 = (int8_t)((ql[l + 32]  >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32;
-                y[l +  0] = d * sc[is + 0] * q1;
-                y[l + 32] = d * sc[is + 2] * q2;
-                y[l + 64] = d * sc[is + 4] * q3;
-                y[l + 96] = d * sc[is + 6] * q4;
-            }
-            y  += 128;
-            ql += 64;
-            qh += 32;
-            sc += 8;
-        }
-#else
-        for (int l = 0; l < 16; ++l) {
-            const int8_t q1 = (int8_t)((ql[l+ 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
-            const int8_t q2 = (int8_t)((ql[l+16] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32;
-            const int8_t q3 = (int8_t)((ql[l+ 0]  >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32;
-            const int8_t q4 = (int8_t)((ql[l+16]  >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32;
-            y[l+ 0] = d * sc[0] * q1;
-            y[l+16] = d * sc[1] * q2;
-            y[l+32] = d * sc[2] * q3;
-            y[l+48] = d * sc[3] * q4;
-        }
-        y  += 64;
-#endif
-    }
-}
-
-kernel void kernel_get_rows_q2_K(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q2_K(
-            (device const block_q2_K *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
-kernel void kernel_get_rows_q3_K(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q3_K(
-            (device const block_q3_K *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
-kernel void kernel_get_rows_q4_K(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q4_K(
-            (device const block_q4_K *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
-kernel void kernel_get_rows_q5_K(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q5_K(
-            (device const block_q5_K *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
-kernel void kernel_get_rows_q6_K(
-        device const  void * src0,
-        device const   int * src1,
-        device       float * dst,
-        constant   int64_t & ne00,
-        constant  uint64_t & nb01,
-        constant  uint64_t & nb1,
-        uint tpig[[thread_position_in_grid]]) {
-    const int i = tpig;
-    const int r = ((device int32_t *) src1)[i];
-
-    dequantize_row_q6_K(
-            (device const block_q6_K *) ((device char *) src0 + r*nb01),
-                       (device float *) ((device char *)  dst + i*nb1), ne00);
-}
-
 //====================================== dot products =========================
 
 kernel void kernel_mul_mat_q2_K_f32(
@@ -1224,21 +797,27 @@ kernel void kernel_mul_mat_q2_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
         constant   int64_t & ne01[[buffer(4)]],
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
     const int nb = ne00/QK_K;
     const int r0 = tgpig.x;
     const int r1 = tgpig.y;
+    const int r2 = tgpig.z;
 
     const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
     const int ib_row = first_row * nb;
-    device const block_q2_K * x = (device const block_q2_K *) src0 + ib_row;
-    device const float      * y = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q2_K * x = (device const block_q2_K *) src0 + ib_row + offset0;
+    device const float      * y = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
     float yl[32];
     float sumf[N_DST]={0.f}, all_sum;
 
@@ -1351,7 +930,7 @@ kernel void kernel_mul_mat_q2_K_f32(
     for (int row = 0; row < N_DST; ++row) {
         all_sum = simd_sum(sumf[row]);
         if (tiisg == 0) {
-            dst[r1*ne0 + first_row + row] = all_sum;
+            dst[r1*ne0 + r2*ne0*ne1 + first_row + row] = all_sum;
         }
     }
 }
@@ -1362,10 +941,14 @@ kernel void kernel_mul_mat_q3_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
-        constant   int64_t & ne1,
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne01[[buffer(4)]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1373,11 +956,12 @@ kernel void kernel_mul_mat_q3_K_f32(
 
     const int64_t r0 = tgpig.x;
     const int64_t r1 = tgpig.y;
+    const int64_t r2 = tgpig.z;
 
     const int first_row = (r0 * N_SIMDGROUP + sgitg) * 2;
-
-    device const block_q3_K * x = (device const block_q3_K *) src0 + first_row*nb;
-    device const float     * yy = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q3_K * x = (device const block_q3_K *) src0 + first_row*nb + offset0;
+    device const float     * yy = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
 
     float yl[16];
 
@@ -1465,7 +1049,7 @@ kernel void kernel_mul_mat_q3_K_f32(
         const float sumf = (sumf1[row] - 32.f*sumf2[row]) / (1 << shift);
         const float tot = simd_sum(sumf);
         if (tiisg == 0) {
-            dst[r1*ne0 + first_row + row] = tot;
+            dst[r1*ne0 + r2*ne0*ne1 + first_row + row] = tot;
         }
     }
 }
@@ -1475,10 +1059,14 @@ kernel void kernel_mul_mat_q3_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
-        constant   int64_t & ne1,
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne01[[buffer(4)]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1486,11 +1074,12 @@ kernel void kernel_mul_mat_q3_K_f32(
 
     const int64_t r0 = tgpig.x;
     const int64_t r1 = tgpig.y;
+    const int64_t r2 = tgpig.z;
 
     const int row = 2 * r0 + sgitg;
-
-    device const block_q3_K * x = (device const block_q3_K *) src0 + row*nb;
-    device const float     * yy = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q3_K * x = (device const block_q3_K *) src0 + row*nb + offset0;
+    device const float     * yy = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
     const int ix = tiisg/4;
     const int il = 4 * (tiisg%4);// 0, 4, 8, 12
     const int im = il/8;         // 0, 0, 1, 1
@@ -1529,7 +1118,7 @@ kernel void kernel_mul_mat_q3_K_f32(
 
     const float tot = simd_sum(sumf);
     if (tiisg == 0) {
-        dst[r1*ne0 + row] = tot;
+        dst[r1*ne0 + r2*ne0*ne1 + row] = tot;
     }
 
 }
@@ -1541,10 +1130,14 @@ kernel void kernel_mul_mat_q4_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
         constant   int64_t & ne01[[buffer(4)]],
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1560,10 +1153,12 @@ kernel void kernel_mul_mat_q4_K_f32(
     const int nb = ne00/QK_K;
     const int r0 = tgpig.x;
     const int r1 = tgpig.y;
+    const int r2 = tgpig.z;
     const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
     const int ib_row = first_row * nb;
-    device const block_q4_K * x = (device const block_q4_K *) src0 + ib_row;
-    device const float      * y = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q4_K * x = (device const block_q4_K *) src0 + ib_row + offset0;
+    device const float      * y = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
     float yl[16];
     float yh[16];
     float sumf[N_DST]={0.f}, all_sum;
@@ -1630,7 +1225,7 @@ kernel void kernel_mul_mat_q4_K_f32(
     for (int row = 0; row < N_DST; ++row) {
         all_sum = simd_sum(sumf[row]);
         if (tiisg == 0) {
-            dst[r1*ne0 + first_row + row] = all_sum;
+            dst[r1*ne0 + r2*ne0*ne1 + first_row + row] = all_sum;
         }
     }
 }
@@ -1640,10 +1235,14 @@ kernel void kernel_mul_mat_q4_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
         constant   int64_t & ne01[[buffer(4)]],
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1653,10 +1252,12 @@ kernel void kernel_mul_mat_q4_K_f32(
     const int nb = ne00/QK_K;
     const int r0 = tgpig.x;
     const int r1 = tgpig.y;
+    const int r2 = tgpig.z;
     const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
     const int ib_row = first_row * nb;
-    device const block_q4_K * x = (device const block_q4_K *) src0 + ib_row;
-    device const float      * y = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q4_K * x = (device const block_q4_K *) src0 + ib_row + offset0;
+    device const float      * y = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
     float yl[8];
     float yh[8];
     float sumf[N_DST]={0.f}, all_sum;
@@ -1712,7 +1313,7 @@ kernel void kernel_mul_mat_q4_K_f32(
     for (int row = 0; row < N_DST; ++row) {
         all_sum = simd_sum(sumf[row]);
         if (tiisg == 0) {
-            dst[r1*ne0 + first_row + row] = all_sum;
+            dst[r1*ne0+ r2*ne0*ne1 + first_row + row] = all_sum;
         }
     }
 }
@@ -1723,9 +1324,14 @@ kernel void kernel_mul_mat_q5_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne01[[buffer(4)]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1733,11 +1339,12 @@ kernel void kernel_mul_mat_q5_K_f32(
 
     const int64_t r0 = tgpig.x;
     const int64_t r1 = tgpig.y;
+    const int r2 = tgpig.z;
 
     const int first_row = (r0 * N_SIMDGROUP + sgitg) * 2;
-
-    device const block_q5_K * x = (device const block_q5_K *) src0 + first_row*nb;
-    device const float     * yy = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q5_K * x = (device const block_q5_K *) src0 + first_row*nb + offset0;
+    device const float     * yy = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
 
     float sumf[2]={0.f};
 
@@ -1871,7 +1478,7 @@ kernel void kernel_mul_mat_q5_K_f32(
     for (int row = 0; row < 2; ++row) {
         const float tot = simd_sum(sumf[row]);
         if (tiisg == 0) {
-            dst[r1*ne0 + first_row + row] = tot;
+            dst[r1*ne0 + r2*ne0*ne1 + first_row + row] = tot;
         }
     }
 
@@ -1882,9 +1489,14 @@ kernel void kernel_mul_mat_q6_K_f32(
         device const float * src1,
         device       float * dst,
         constant   int64_t & ne00,
-        constant   int64_t & ne10,
-        constant   int64_t & ne0,
-        uint2 tgpig[[threadgroup_position_in_grid]],
+        constant   int64_t & ne01[[buffer(4)]],
+        constant   int64_t & ne02[[buffer(5)]],
+        constant   int64_t & ne10[[buffer(9)]],
+        constant   int64_t & ne12[[buffer(11)]],
+        constant   int64_t & ne0[[buffer(15)]],
+        constant   int64_t & ne1[[buffer(16)]],
+        constant   uint    & gqa[[buffer(17)]],
+        uint3 tgpig[[threadgroup_position_in_grid]],
         uint tiisg[[thread_index_in_simdgroup]],
         uint sgitg[[simdgroup_index_in_threadgroup]]) {
 
@@ -1897,11 +1509,12 @@ kernel void kernel_mul_mat_q6_K_f32(
 
     const int64_t r0 = tgpig.x;
     const int64_t r1 = tgpig.y;
+    const int r2 = tgpig.z;
 
     const int row = 2 * r0 + sgitg;
-
-    device const block_q6_K * x = (device const block_q6_K *) src0 + row * nb; //r0*nb;
-    device const float     * yy = (device const float      *) src1 + r1*ne10;
+    const uint offset0 = r2/gqa*(nb*ne0);
+    device const block_q6_K * x = (device const block_q6_K *) src0 + row * nb + offset0;
+    device const float     * yy = (device const float      *) src1 + r1*ne10 + r2*ne00*ne1;
 
     float sumf = 0;
 
@@ -1967,6 +1580,366 @@ kernel void kernel_mul_mat_q6_K_f32(
 
     const float tot = simd_sum(sumf);
     if (tiisg == 0) {
-        dst[r1*ne0 + row] = tot;
+        dst[r1*ne0 + r2*ne0*ne1 + row] = tot;
     }
 }
+
+//============================= templates and their specializations =============================
+
+template <typename type4x4>
+void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) {
+    half4x4 temp = *(((device half4x4 *)src));
+    for (int i = 0; i < 16; i++){
+        reg[i/4][i%4] = temp[i/4][i%4];
+    }
+}
+
+template <typename type4x4>
+void dequantize_q4_0(device const block_q4_0 *xb, short il, thread type4x4 & reg) {
+    device const uint16_t * qs = ((device const uint16_t *)xb + 1);
+    const half d = il ? (xb->d / 16.h) : xb->d;
+    const half m = il ? (-8.h * 16.h) : -8.h;
+    const ushort mask0 = il ? 0x00F0 : 0x000F;
+    const ushort mask1 = il ? 0xF000 : 0x0F00;
+
+    for (int i=0;i<8;i++) {
+        reg[i/2][2*(i%2)] = (((qs[i] & mask0)) + m) * d;
+        reg[i/2][2*(i%2)+1] = (((qs[i] & mask1) >> 8) + m) * d;
+    }
+}
+
+template <typename type4x4>
+void dequantize_q4_1(device const block_q4_1 *xb, short il, thread type4x4 & reg) {
+    device const uint16_t * qs = ((device const uint16_t *)xb + 2);
+    const half d = il ? (xb->d / 16.h) : xb->d;
+    const half m = xb->m;
+    const ushort mask0 = il ? 0x00F0 : 0x000F;
+    const ushort mask1 = il ? 0xF000 : 0x0F00;
+
+    for (int i=0;i<8;i++) {
+        reg[i/2][2*(i%2)] = (((qs[i] & mask0)) * d) + m;
+        reg[i/2][2*(i%2)+1] = (((qs[i] & mask1) >> 8) * d) + m;
+    }
+}
+
+template <typename type4x4>
+void dequantize_q2_K(device const block_q2_K *xb, short il, thread type4x4 & reg) {
+    const half d = xb->d;
+    const half min = xb->dmin;
+    device const uint8_t * q = (device const uint8_t *)xb->qs;
+    half dl, ml;
+    uint8_t sc = xb->scales[il];
+
+#if QK_K == 256
+    q = q + 32*(il/8) + 16*(il&1);
+    il = (il/2)%4;
+#endif
+    half  coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
+    uchar mask = il>1 ? (il>2 ? 192    : 48)     : (il>0 ? 12    : 3);
+    dl = d * (sc & 0xF) * coef, ml = min * (sc >> 4);
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = dl * (q[i] & mask) - ml;
+    }
+}
+
+template <typename type4x4>
+void dequantize_q3_K(device const block_q3_K *xb, short il, thread type4x4 & reg) {
+    const float d_all = (float)(xb->d);
+    device const uint8_t * q = (device const uint8_t *)xb->qs;
+    device const uint8_t * h = (device const uint8_t *)xb->hmask;
+    device const int8_t * scales = (device const int8_t *)xb->scales;
+
+#if QK_K == 256
+    q = q + 32 * (il/8) + 16 * (il&1);
+    h = h + 16 * (il&1);
+    uint8_t m = 1 << (il/2);
+    uint16_t kmask1 = (il/4)>1 ? ((il/4)>2 ? 192 : 48) : \
+                                 ((il/4)>0 ? 12  : 3);
+    uint16_t kmask2 = il/8 ? 0xF0 : 0x0F;
+    uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4];
+    int16_t  dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2) : \
+                                 (scale_2&kmask2) | ((scale_1&kmask1) << 4);
+    float dl = il<8 ? d_all * (dl_int - 32.f) : d_all * (dl_int / 16.f - 32.f);
+
+    il = (il/2)%4;
+    float   coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
+    uint8_t mask = il>1 ? (il>2 ? 192    : 48)     : (il>0 ? 12    : 3);
+
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = coef * dl * ((q[i] & mask) - ((h[i] & m) ? 0 : 4.f/coef));
+    }
+#else
+    float    kcoef = il&1 ? 1.f/16.f : 1.f;
+    uint16_t kmask = il&1 ? 0xF0     : 0x0F;
+    float    dl = d_all * ((scales[il/2] & kmask) * kcoef - 8);
+    float    coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
+    uint8_t  mask = il>1 ? (il>2 ? 192    : 48)     : (il>0 ? 12    : 3);
+    uint8_t  m = 1<<(il*2);
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = coef * dl * ((q[i] & mask) - ((h[i%8] & (m * (1 + i/8))) ? 0 : 4.f/coef));
+    }
+#endif
+}
+
+template <typename type4x4>
+void dequantize_q4_K(device const block_q4_K *xb, short il, thread type4x4 & reg) {
+    device const uint8_t * q = xb->qs;
+
+#if QK_K == 256
+    const float d = (float)(xb->d);
+    const float min = (float)(xb->dmin);
+    short is = (il/4) * 2;
+    q = q + (il/4) * 32 + 16 * (il&1);
+    il = il%4;
+    const uchar4 sc = get_scale_min_k4(is, xb->scales);
+    const float dl = il<2 ? d * sc[0]   : d * sc[2]/16.h;
+    const float ml = il<2 ? min * sc[1] : min * sc[3];
+#else
+    q = q + 16 * (il&1);
+    device const uint8_t * s = xb->scales;
+    device const half2 * dh = (device const half2 *)xb->d;
+    const float2 d = (float2)dh[0];
+    const float dl = il<2 ? d[0] * (s[0]&0xF) : d[0] * (s[1]&0xF)/16.h;
+    const float ml = il<2 ? d[1] * (s[0]>>4)  : d[1 ]* (s[1]>>4);
+#endif
+    const ushort mask = il<2 ? 0x0F : 0xF0;
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = dl * (q[i] & mask) - ml;
+    }
+}
+
+template <typename type4x4>
+void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg) {
+    device const uint8_t * q  = xb->qs;
+    device const uint8_t * qh = xb->qh;
+
+#if QK_K == 256
+    const float d = (float)(xb->d);
+    const float min = (float)(xb->dmin);
+    short is = (il/4) * 2;
+    q  = q + 32 * (il/4) + 16 * (il&1);
+    qh = qh + 16 * (il&1);
+    uint8_t ul = 1 << (il/2);
+    il = il%4;
+    const uchar4 sc = get_scale_min_k4(is, xb->scales);
+    const float dl = il<2 ? d * sc[0]   : d * sc[2]/16.h;
+    const float ml = il<2 ? min * sc[1] : min * sc[3];
+
+    const ushort mask   = il<2 ? 0x0F : 0xF0;
+    const float  qh_val = il<2 ? 16.f : 256.f;
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = dl * ((q[i] & mask) + (qh[i] & ul ? qh_val : 0)) - ml;
+    }
+#else
+    q = q + 16 * (il&1);
+    device const int8_t * s = xb->scales;
+    const float dl = xb->d * s[il];
+    uint8_t m = 1<<(il*2);
+    const float  coef = il<2 ? 1.f  : 1.f/16.f;
+    const ushort mask = il<2 ? 0x0F : 0xF0;
+    for (int i = 0; i < 16; ++i) {
+        reg[i/4][i%4] = coef * dl * ((q[i] & mask) - (qh[i%8] & (m*(1+i/8)) ? 0.f : 16.f/coef));
+    }
+#endif
+}
+
+template <typename type4x4>
+void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg) {
+    const float d_all = (float)(xb->d);
+    device const uint8_t * ql = (device const uint8_t *)xb->ql;
+    device const uint8_t * qh = (device const uint8_t *)xb->qh;
+    device const int8_t * scales = (device const int8_t *)xb->scales;
+
+#if QK_K == 256
+    ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1);
+    qh = qh + 32*(il/8) + 16*(il&1);
+    float sc = scales[(il%2) + 2 * ((il/2))];
+    il = (il/2)%4;
+#else
+    ql = ql + 16 * (il&1);
+    float sc = scales[il];
+#endif
+    for (int i = 0; i < 16; ++i) {
+        uint16_t  kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3);
+        uint16_t  kmask2 = il>1 ? 0xF0              : 0x0F;
+        const float coef = il>1 ? 1.f/16.f          : 1.f;
+        float q = il&1 ? ((ql[i]&kmask2)|((qh[i]&kmask1)<<2)) - 32.f/coef : \
+                         ((ql[i]&kmask2)|((qh[i]&kmask1)<<4)) - 32.f/coef;
+        reg[i/4][i%4] = d_all * sc * q * coef;
+    }
+}
+
+template<typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread float4x4 &)>
+kernel void kernel_get_rows(
+        device const  void * src0,
+        device const   int * src1,
+        device       float * dst,
+        constant   int64_t & ne00,
+        constant  uint64_t & nb01,
+        constant  uint64_t & nb1,
+        uint                 tgpig[[threadgroup_position_in_grid]],
+        uint                 tiitg[[thread_index_in_threadgroup]],
+        uint                 tptg[[threads_per_threadgroup]]) {
+    const int i = tgpig;
+    const int r = ((device int32_t *) src1)[i];
+
+    for (int ind = tiitg; ind < ne00/16; ind += tptg) {
+        float4x4 temp;
+        dequantize_func(
+            ((device const block_q *) ((device char *) src0 + r*nb01)) + ind/nl, ind%nl, temp);
+        *(((device float4x4 *) ((device char *) dst + i*nb1)) + ind) = temp;
+    }
+}
+
+#define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A
+#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix A
+#define BLOCK_SIZE_K 32
+#define THREAD_MAT_M 4 // each thread take 4 simdgroup matrices from matrix A
+#define THREAD_MAT_N 2 // each thread take 2 simdgroup matrices from matrix B
+#define THREAD_PER_BLOCK 128
+#define THREAD_PER_ROW 2 // 2 thread for each row in matrix A to load numbers
+#define THREAD_PER_COL 4 // 4 thread for each row in matrix B to load numbers
+#define SG_MAT_SIZE 64 // simdgroup matrix is of shape 8x8
+#define SG_MAT_ROW 8
+
+// each block_q contains 16*nl weights
+template<typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread half4x4 &)>
+kernel void kernel_mul_mm(device const  uchar * src0,
+                           device const  float * src1,
+                           device        float * dst,
+                           constant    int64_t & ne00,
+                           constant    int64_t & ne02,
+                           constant    int64_t & nb01,
+                           constant    int64_t & nb02,
+                           constant    int64_t & ne12,
+                           constant    int64_t & ne0,
+                           constant    int64_t & ne1,
+                           constant    uint & gqa,
+                           threadgroup   uchar * shared_memory [[threadgroup(0)]],
+                           uint3                 tgpig[[threadgroup_position_in_grid]],
+                           uint                  tiitg[[thread_index_in_threadgroup]],
+                           uint                  sgitg[[simdgroup_index_in_threadgroup]]) {
+
+    threadgroup half * sa = ((threadgroup half *)shared_memory);
+    threadgroup float * sb = (threadgroup float *)(shared_memory + 4096);
+
+    const uint r0 = tgpig.y;
+    const uint r1 = tgpig.x;
+    const uint im = tgpig.z;
+    // if this block is of 64x32 shape or smaller
+    short n_rows = (ne0 - r0 * BLOCK_SIZE_M < BLOCK_SIZE_M) ? (ne0 - r0 * BLOCK_SIZE_M) : BLOCK_SIZE_M;
+    short n_cols = (ne1 - r1 * BLOCK_SIZE_N < BLOCK_SIZE_N) ? (ne1 - r1 * BLOCK_SIZE_N) : BLOCK_SIZE_N;
+    // a thread shouldn't load data outside of the matrix
+    short thread_row = ((short)tiitg/THREAD_PER_ROW) < n_rows ? ((short)tiitg/THREAD_PER_ROW) : n_rows - 1;
+    short thread_col = ((short)tiitg/THREAD_PER_COL) < n_cols ? ((short)tiitg/THREAD_PER_COL) : n_cols - 1;
+
+    simdgroup_half8x8 ma[4];
+    simdgroup_float8x8 mb[2];
+    simdgroup_float8x8 c_res[8];
+    for (int i = 0; i < 8; i++){
+        c_res[i] = make_filled_simdgroup_matrix<float, 8>(0.f);
+    }
+
+    short il = (tiitg % THREAD_PER_ROW);
+    uint offset0 = im/gqa*nb02; ushort offset1 = il/nl;
+    device const block_q  * x = (device const block_q  *)(src0 + (r0 * BLOCK_SIZE_M + thread_row) * nb01 + offset0) + offset1;
+    device const float * y = src1 + (r1 * BLOCK_SIZE_N + thread_col) * ne00 \
+                             + BLOCK_SIZE_K / THREAD_PER_COL * (tiitg % THREAD_PER_COL) + im * ne00 * ne1;
+
+    for (int loop_k = 0; loop_k < ne00; loop_k += BLOCK_SIZE_K) {
+        //load data and store to threadgroup memory
+        half4x4 temp_a;
+        dequantize_func(x, il, temp_a);
+        #pragma unroll(16)
+        for (int i = 0; i < 16; i++) {
+            *(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
+            + 16 * (tiitg % THREAD_PER_ROW) + 8 * (i / 8)) \
+            + (tiitg / THREAD_PER_ROW) % 8 + (i & 7) * 8) = temp_a[i/4][i%4];
+        }
+        *(threadgroup float2x4 *)(sb + (tiitg % THREAD_PER_COL) * 8 * 32 + 8 * (tiitg / THREAD_PER_COL)) \
+                = *((device float2x4 *)y);
+        il = (il + 2 < nl) ? il + 2 : il % 2;
+        x  = (il < 2) ? x + (2+nl-1)/nl : x;
+        y += BLOCK_SIZE_K;
+
+        threadgroup_barrier(mem_flags::mem_threadgroup);
+        //load matrices from threadgroup memory and conduct outer products
+        threadgroup half  * lsma = (sa + THREAD_MAT_M * SG_MAT_SIZE * (sgitg % 2));
+        threadgroup float * lsmb = (sb + THREAD_MAT_N * SG_MAT_SIZE * (sgitg / 2));
+        #pragma unroll(4)
+        for (int ik = 0; ik < BLOCK_SIZE_K / 8; ik++) {
+            #pragma unroll(4)
+            for (int i = 0; i < 4; i++) {
+                simdgroup_load(ma[i],lsma + SG_MAT_SIZE * i);
+            }
+            simdgroup_barrier(mem_flags::mem_none);
+            #pragma unroll(2)
+            for (int i = 0; i < 2; i++) {
+                simdgroup_load(mb[i],lsmb + SG_MAT_SIZE * i);
+            }
+
+            lsma += BLOCK_SIZE_M / SG_MAT_ROW * SG_MAT_SIZE;
+            lsmb += BLOCK_SIZE_N / SG_MAT_ROW * SG_MAT_SIZE;
+            #pragma unroll(8)
+            for (int i = 0; i < 8; i++){
+                simdgroup_multiply_accumulate(c_res[i], mb[i/4], ma[i%4], c_res[i]);
+            }
+        }
+    }
+
+    if ((r0 + 1) * BLOCK_SIZE_M <= ne0 && (r1 + 1) * BLOCK_SIZE_N <= ne1) {
+        device float *C = dst + BLOCK_SIZE_M * r0 + 32 * (sgitg&1) \
+                          + (BLOCK_SIZE_N * r1 + 16 * (sgitg>>1)) * ne0 + im*ne1*ne0;
+        for (int i = 0; i < 8; i++) {
+            simdgroup_store(c_res[i], C + 8 * (i%4) + 8 * ne0 * (i/4), ne0);
+        }
+    } else {
+        // block is smaller than 64x32, we should avoid writing data outside of the matrix
+        threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+                                      + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
+        for (int i = 0; i < 8; i++) {
+            simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
+        }
+
+        threadgroup_barrier(mem_flags::mem_threadgroup);
+        device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
+        if (sgitg==0) {
+            for (int i = 0; i < n_rows; i++) {
+                for (int j = tiitg; j< n_cols; j += BLOCK_SIZE_N) {
+                    *(C + i + j * ne0) = *(temp_str + i + j * BLOCK_SIZE_M);
+                }
+            }
+        }
+    }
+}
+
+#if QK_K == 256
+#define QK_NL 16
+#else
+#define QK_NL 4
+#endif
+
+typedef void (get_rows_t)(device const void *, device const int *, device float *, constant int64_t &, \
+                          constant uint64_t &, constant uint64_t &, uint, uint, uint);
+
+template [[host_name("kernel_get_rows_f16")]] kernel get_rows_t kernel_get_rows<half4x4, 1, dequantize_f16>;
+template [[host_name("kernel_get_rows_q4_0")]] kernel get_rows_t kernel_get_rows<block_q4_0, 2, dequantize_q4_0>;
+template [[host_name("kernel_get_rows_q4_1")]] kernel get_rows_t kernel_get_rows<block_q4_1, 2, dequantize_q4_1>;
+template [[host_name("kernel_get_rows_q2_K")]] kernel get_rows_t kernel_get_rows<block_q2_K, QK_NL, dequantize_q2_K>;
+template [[host_name("kernel_get_rows_q3_K")]] kernel get_rows_t kernel_get_rows<block_q3_K, QK_NL, dequantize_q3_K>;
+template [[host_name("kernel_get_rows_q4_K")]] kernel get_rows_t kernel_get_rows<block_q4_K, QK_NL, dequantize_q4_K>;
+template [[host_name("kernel_get_rows_q5_K")]] kernel get_rows_t kernel_get_rows<block_q5_K, QK_NL, dequantize_q5_K>;
+template [[host_name("kernel_get_rows_q6_K")]] kernel get_rows_t kernel_get_rows<block_q6_K, QK_NL, dequantize_q6_K>;
+
+typedef void (mat_mm_t)(device const uchar *, device const float *, device float *, constant int64_t &,\
+                             constant int64_t &, constant int64_t &, constant int64_t &, constant int64_t &, \
+                             constant int64_t &, constant int64_t &, constant uint &, threadgroup uchar *, uint3, uint, uint);
+
+template [[host_name("kernel_mul_mm_f16_f32")]] kernel mat_mm_t kernel_mul_mm<half4x4, 1, dequantize_f16>;
+template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2, dequantize_q4_0>;
+template [[host_name("kernel_mul_mm_q4_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2, dequantize_q4_1>;
+template [[host_name("kernel_mul_mm_q2_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q2_K, QK_NL, dequantize_q2_K>;
+template [[host_name("kernel_mul_mm_q3_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q3_K, QK_NL, dequantize_q3_K>;
+template [[host_name("kernel_mul_mm_q4_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_K, QK_NL, dequantize_q4_K>;
+template [[host_name("kernel_mul_mm_q5_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_K, QK_NL, dequantize_q5_K>;
+template [[host_name("kernel_mul_mm_q6_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
diff --git a/ggml.c b/ggml.c
index f7e72ed84..228271fc2 100644
--- a/ggml.c
+++ b/ggml.c
@@ -19290,7 +19290,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
                         }
                         gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
                         free(data);
-                    } if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
+                    } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
                         GGML_ASSERT(false && "nested arrays not supported");
                     } else {
                         gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
diff --git a/ggml.h b/ggml.h
index ad12c133e..27f4b0ed8 100644
--- a/ggml.h
+++ b/ggml.h
@@ -214,7 +214,7 @@
 #define GGML_EXIT_SUCCESS 0
 #define GGML_EXIT_ABORTED 1
 
-#define GGUF_MAGIC   0x47475546 // "GGUF"
+#define GGUF_MAGIC   0x46554747 // "GGUF"
 #define GGUF_VERSION 1
 
 #define GGUF_DEFAULT_ALIGNMENT 32
diff --git a/gguf.py b/gguf.py
index e7f6f0ac8..e5eb85ded 100644
--- a/gguf.py
+++ b/gguf.py
@@ -1,21 +1,17 @@
-"""TODOs
-1. Implement writers for known architectures, LLaMA in particular.
-2. Add docstrings from the format specs.
-3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org.
-"""
-
+import shutil
 import sys
 import struct
+import tempfile
 import numpy as np
 
-from enum import IntEnum
+from enum import IntEnum, auto
 from typing import Any, IO, List
 
 #
 # constants
 #
 
-GGUF_MAGIC             = 0x47475546
+GGUF_MAGIC             = 0x46554747
 GGUF_VERSION           = 1
 GGUF_DEFAULT_ALIGNMENT = 32
 
@@ -27,30 +23,29 @@ KEY_GENERAL_NAME                 = "general.name"
 KEY_GENERAL_AUTHOR               = "general.author"
 KEY_GENERAL_URL                  = "general.url"
 KEY_GENERAL_DESCRIPTION          = "general.description"
-KEY_GENERAL_FILE_TYPE            = "general.file_type"
 KEY_GENERAL_LICENSE              = "general.license"
 KEY_GENERAL_SOURCE_URL           = "general.source.url"
 KEY_GENERAL_SOURCE_HF_REPO       = "general.source.hugginface.repository"
 
 # LLM
-KEY_LLM_CONTEXT_LENGTH           = "{llm}.context_length"
-KEY_LLM_EMBEDDING_LENGTH         = "{llm}.embedding_length"
-KEY_LLM_BLOCK_COUNT              = "{llm}.block_count"
-KEY_LLM_FEED_FORWARD_LENGTH      = "{llm}.feed_forward_length"
-KEY_LLM_USE_PARALLEL_RESIDUAL    = "{llm}.use_parallel_residual"
-KEY_LLM_TENSOR_DATA_LAYOUT       = "{llm}.tensor_data_layout"
+KEY_LLM_CONTEXT_LENGTH        = "{arch}.context_length"
+KEY_LLM_EMBEDDING_LENGTH      = "{arch}.embedding_length"
+KEY_LLM_BLOCK_COUNT           = "{arch}.block_count"
+KEY_LLM_FEED_FORWARD_LENGTH   = "{arch}.feed_forward_length"
+KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
+KEY_LLM_TENSOR_DATA_LAYOUT    = "{arch}.tensor_data_layout"
 
 # attention
-KEY_ATTENTION_HEAD_COUNT         = "{llm}.attention.head_count"
-KEY_ATTENTION_HEAD_COUNT_KV      = "{llm}.attention.head_count_kv"
-KEY_ATTENTION_MAX_ALIBI_BIAS     = "{llm}.attention.max_alibi_bias"
-KEY_ATTENTION_CLAMP_KQV          = "{llm}.attention.clamp_kqv"
-KEY_ATTENTION_LAYERNORM_EPS      = "{llm}.attention.layer_norm_epsilon"
-KEY_ATTENTION_LAYERNORM_RMS_EPS  = "{llm}.attention.layer_norm_rms_epsilon"
+KEY_ATTENTION_HEAD_COUNT        = "{arch}.attention.head_count"
+KEY_ATTENTION_HEAD_COUNT_KV     = "{arch}.attention.head_count_kv"
+KEY_ATTENTION_MAX_ALIBI_BIAS    = "{arch}.attention.max_alibi_bias"
+KEY_ATTENTION_CLAMP_KQV         = "{arch}.attention.clamp_kqv"
+KEY_ATTENTION_LAYERNORM_EPS     = "{arch}.attention.layer_norm_epsilon"
+KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
 
 # RoPE
-KEY_ROPE_DIMENSION_COUNT         = "{llm}.rope.dimension_count"
-KEY_ROPE_SCALE                   = "{llm}.rope.scale"
+KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
+KEY_ROPE_SCALE           = "{arch}.rope.scale"
 
 # tokenization
 KEY_TOKENIZER_MODEL      = "tokenizer.ggml.model"
@@ -70,97 +65,257 @@ KEY_TOKENIZER_RWKV       = "tokenizer.rwkv.world"
 # recommended mapping of model tensor names for storage in gguf
 #
 
-def get_tensor_name_map(n_blocks : int):
+
+class MODEL_ARCH(IntEnum):
+    LLAMA   = auto()
+    FALCON  = auto()
+    GPT2    = auto()
+    GPTJ    = auto()
+    GPTNEOX = auto()
+    MPT     = auto()
+
+
+class MODEL_TENSOR(IntEnum):
+    TOKEN_EMBD    = auto()
+    POS_EMBD      = auto()
+    OUTPUT        = auto()
+    OUTPUT_NORM   = auto()
+    ROPE_FREQS    = auto()
+    ATTN_Q        = auto()
+    ATTN_K        = auto()
+    ATTN_V        = auto()
+    ATTN_QKV      = auto()
+    ATTN_OUT      = auto()
+    ATTN_NORM     = auto()
+    ATTN_NORM_2   = auto()
+    ATTN_ROT_EMBD = auto()
+    FFN_GATE      = auto()
+    FFN_DOWN      = auto()
+    FFN_UP        = auto()
+    FFN_NORM      = auto()
+
+
+MODEL_ARCH_NAMES = {
+    MODEL_ARCH.LLAMA:   "llama",
+    MODEL_ARCH.FALCON:  "falcon",
+    MODEL_ARCH.GPT2:    "gpt2",
+    MODEL_ARCH.GPTJ:    "gptj",
+    MODEL_ARCH.GPTNEOX: "gptneox",
+    MODEL_ARCH.MPT:     "mpt",
+}
+
+MODEL_TENSOR_NAMES = {
+    MODEL_ARCH.LLAMA: {
+        MODEL_TENSOR.TOKEN_EMBD:    "token_embd",
+        MODEL_TENSOR.OUTPUT_NORM:   "output_norm",
+        MODEL_TENSOR.OUTPUT:        "output",
+        MODEL_TENSOR.ROPE_FREQS:    "rope_freqs",
+        MODEL_TENSOR.ATTN_NORM:     "blk.{bid}.attn_norm",
+        MODEL_TENSOR.ATTN_Q:        "blk.{bid}.attn_q",
+        MODEL_TENSOR.ATTN_K:        "blk.{bid}.attn_k",
+        MODEL_TENSOR.ATTN_V:        "blk.{bid}.attn_v",
+        MODEL_TENSOR.ATTN_OUT:      "blk.{bid}.attn_output",
+        MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
+        MODEL_TENSOR.FFN_NORM:      "blk.{bid}.ffn_norm",
+        MODEL_TENSOR.FFN_GATE:      "blk.{bid}.ffn_gate",
+        MODEL_TENSOR.FFN_DOWN:      "blk.{bid}.ffn_down",
+        MODEL_TENSOR.FFN_UP:        "blk.{bid}.ffn_up",
+    },
+    MODEL_ARCH.GPTNEOX: {
+        MODEL_TENSOR.TOKEN_EMBD:    "token_embd",
+        MODEL_TENSOR.OUTPUT_NORM:   "output_norm",
+        MODEL_TENSOR.OUTPUT:        "output",
+        MODEL_TENSOR.ATTN_NORM:     "blk.{bid}.attn_norm",
+        MODEL_TENSOR.ATTN_QKV:      "blk.{bid}.attn_qkv",
+        MODEL_TENSOR.ATTN_OUT:      "blk.{bid}.attn_output",
+        MODEL_TENSOR.FFN_NORM:      "blk.{bid}.ffn_norm",
+        MODEL_TENSOR.FFN_DOWN:      "blk.{bid}.ffn_down",
+        MODEL_TENSOR.FFN_UP:        "blk.{bid}.ffn_up",
+    },
+    MODEL_ARCH.FALCON: {
+        MODEL_TENSOR.TOKEN_EMBD:  "token_embd",
+        MODEL_TENSOR.OUTPUT_NORM: "output_norm",
+        MODEL_TENSOR.OUTPUT:      "output",
+        MODEL_TENSOR.ATTN_NORM:   "blk.{bid}.attn_norm",
+        MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
+        MODEL_TENSOR.ATTN_QKV:    "blk.{bid}.attn_qkv",
+        MODEL_TENSOR.ATTN_OUT:    "blk.{bid}.attn_output",
+        MODEL_TENSOR.FFN_DOWN:    "blk.{bid}.ffn_down",
+        MODEL_TENSOR.FFN_UP:      "blk.{bid}.ffn_up",
+    },
+    MODEL_ARCH.GPT2: {
+        # TODO
+    },
+    # TODO
+}
+
+# tensors that will not be serialized
+MODEL_TENSOR_SKIP = {
+    MODEL_ARCH.LLAMA: [
+        MODEL_TENSOR.ROPE_FREQS,
+        MODEL_TENSOR.ATTN_ROT_EMBD,
+    ],
+}
+
+
+# TODO: the following helper functions should be removed
+#       instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR)
+#       however, my Python is very bad, and I couldn't figure out how to do this, hence these functions
+# REMOVE
+def should_skip_tensor_TMP(arch: MODEL_ARCH, n_blocks: int, name: str) -> bool:
+    for skip in MODEL_TENSOR_SKIP.get(arch, []):
+        for i in range(n_blocks):
+            if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):
+                return True
+
+    return False
+
+
+def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> dict:
     tensor_map = {}
+
     # Token embeddings
-    mapped_to = "token_embd"
-    tensor_map["gpt_neox.embed_in"]           = mapped_to # gptneox
-    tensor_map["transformer.wte"]             = mapped_to # gpt2 mpt
-    tensor_map["transformer.word_embeddings"] = mapped_to # falcon
-    tensor_map["model.embed_tokens"]          = mapped_to # llama-hf
-    tensor_map["tok_embeddings"]              = mapped_to # llama-pth
+    mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.TOKEN_EMBD, None)
+
+    tensor_map["gpt_neox.embed_in"]           = mapped_to  # gptneox
+    tensor_map["transformer.wte"]             = mapped_to  # gpt2 mpt
+    tensor_map["transformer.word_embeddings"] = mapped_to  # falcon
+    tensor_map["model.embed_tokens"]          = mapped_to  # llama-hf
+    tensor_map["tok_embeddings"]              = mapped_to  # llama-pth
+
     # Position embeddings
-    mapped_to = "pos_embd"
-    tensor_map["transformer.wpe"] = mapped_to # gpt2
-    # Output norm
-    mapped_to = "output_norm"
-    tensor_map["gpt_neox.final_layer_norm"] = mapped_to # gptneox
-    tensor_map["transformer.ln_f"]          = mapped_to # gpt2 falcon
-    tensor_map["transformer.norm_f"]        = mapped_to # mpt
-    tensor_map["model.norm"]                = mapped_to # llama-hf
-    tensor_map["norm"]                      = mapped_to # llama-pth
+    mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.POS_EMBD, None)
+
+    tensor_map["transformer.wpe"] = mapped_to  # gpt2
+
     # Output
-    mapped_to = "output"
-    tensor_map["embed_out"] = mapped_to # gptneox
-    tensor_map["lm_head"]   = mapped_to # gpt2 mpt falcon llama-hf
-    tensor_map["output"]    = mapped_to # llama-pth
-    # Attention and fee-forward layer blocks
-    for i in range(0,n_blocks):
+    mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT, None)
+
+    tensor_map["embed_out"] = mapped_to  # gptneox
+    tensor_map["lm_head"]   = mapped_to  # gpt2 mpt falcon llama-hf
+    tensor_map["output"]    = mapped_to  # llama-pth
+
+    # Output norm
+    mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT_NORM, None)
+
+    tensor_map["gpt_neox.final_layer_norm"] = mapped_to  # gptneox
+    tensor_map["transformer.ln_f"]          = mapped_to  # gpt2 falcon
+    tensor_map["transformer.norm_f"]        = mapped_to  # mpt
+    tensor_map["model.norm"]                = mapped_to  # llama-hf
+    tensor_map["norm"]                      = mapped_to  # llama-pth
+
+    # Rope frequencies
+    mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ROPE_FREQS, None)
+
+    tensor_map["rope.freqs"] = mapped_to  # llama-pth
+
+    # Attention and feed-forward blocks
+    for i in range(0, n_blocks):
         # Attention norm
-        mapped_to = "blk."+str(i)+".attn_norm"
-        tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".ln_1"]              = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".norm_1"]       = mapped_to # mpt
-        tensor_map["transformer.h."+str(i)+".input_layernorm"]   = mapped_to # falcon7b
-        tensor_map["transformer.h."+str(i)+".ln_attn"]           = mapped_to # falcon40b
-        tensor_map["model.layers."+str(i)+".input_layernorm"]    = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".attention_norm"]           = mapped_to # llama-pth
+        # TODO: is there are simpler way to write these 2 lines in Python?
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".ln_1"]              = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".norm_1"]       = mapped_to  # mpt
+        tensor_map["transformer.h."+str(i)+".input_layernorm"]   = mapped_to  # falcon7b
+        tensor_map["transformer.h."+str(i)+".ln_mlp"]            = mapped_to  # falcon40b
+        tensor_map["model.layers."+str(i)+".input_layernorm"]    = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention_norm"]           = mapped_to  # llama-pth
+
         # Attention norm 2
-        mapped_to = "blk."+str(i)+".attn_norm_2"
-        tensor_map["transformer.h."+str(i)+".ln_mlp"] = mapped_to # falcon40b
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM_2, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to  # falcon40b
+
         # Attention query-key-value
-        mapped_to = "blk."+str(i)+".attn_qkv"
-        tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"]    = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".attn.c_attn"]                    = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"]                 = mapped_to # mpt
-        tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to # falcon
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_QKV, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"]    = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".attn.c_attn"]                    = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"]                 = mapped_to  # mpt
+        tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to  # falcon
+
         # Attention query
-        mapped_to = "blk."+str(i)+".attn_q"
-        tensor_map["model.layers."+str(i)+".self_attn.q_proj"] = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".attention.wq"]           = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_Q, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["model.layers."+str(i)+".self_attn.q_proj"] = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention.wq"]           = mapped_to  # llama-pth
+
         # Attention key
-        mapped_to = "blk."+str(i)+".attn_k"
-        tensor_map["model.layers."+str(i)+".self_attn.k_proj"] = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".attention.wk"]           = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_K, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["model.layers."+str(i)+".self_attn.k_proj"] = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention.wk"]           = mapped_to  # llama-pth
+
         # Attention value
-        mapped_to = "blk."+str(i)+".attn_v"
-        tensor_map["model.layers."+str(i)+".self_attn.v_proj"] = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".attention.wv"]           = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_V, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["model.layers."+str(i)+".self_attn.v_proj"] = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention.wv"]           = mapped_to  # llama-pth
+
         # Attention output
-        mapped_to = "blk."+str(i)+".attn_output"
-        tensor_map["gpt_neox.layers."+str(i)+".attention.dense"]    = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".attn.c_proj"]          = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".attn.out_proj"]   = mapped_to # mpt
-        tensor_map["transformer.h."+str(i)+".self_attention.dense"] = mapped_to # falcon
-        tensor_map["model.layers."+str(i)+".self_attn.o_proj"]      = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".attention.wo"]                = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_OUT, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".attention.dense"]    = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".attn.c_proj"]          = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".attn.out_proj"]   = mapped_to  # mpt
+        tensor_map["transformer.h."+str(i)+".self_attention.dense"] = mapped_to  # falcon
+        tensor_map["model.layers."+str(i)+".self_attn.o_proj"]      = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention.wo"]                = mapped_to  # llama-pth
+
+        # Rotary embeddings
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_ROT_EMBD, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["model.layers."+str(i)+".self_attn.rotary_emb.inv_freq"]  = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".attention.inner_attention.rope.freqs"] = mapped_to  # llama-pth
+
         # Feed-forward norm
-        mapped_to = "blk."+str(i)+".ffn_norm"
-        tensor_map["gpt_neox.layers."+str(i)+".post_attention_layernorm"] = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".ln_2"]                       = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".norm_2"]                = mapped_to # mpt
-        tensor_map["model.layers."+str(i)+".post_attention_layernorm"]    = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".ffn_norm"]                          = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_NORM, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".post_attention_layernorm"] = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".ln_2"]                       = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".norm_2"]                = mapped_to  # mpt
+        tensor_map["model.layers."+str(i)+".post_attention_layernorm"]    = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".ffn_norm"]                          = mapped_to  # llama-pth
+
         # Feed-forward up
-        mapped_to = "blk."+str(i)+".ffn_up"
-        tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".mlp.c_fc"]            = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"]    = mapped_to # mpt
-        tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"]   = mapped_to # falcon
-        tensor_map["model.layers."+str(i)+".mlp.up_proj"]          = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".feed_forward.w3"]            = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_UP, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_h_to_4h"] = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".mlp.c_fc"]            = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"]    = mapped_to  # mpt
+        tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"]   = mapped_to  # falcon
+        tensor_map["model.layers."+str(i)+".mlp.up_proj"]          = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".feed_forward.w3"]            = mapped_to  # llama-pth
+
         # Feed-forward gate
-        mapped_to = "blk."+str(i)+".ffn_gate"
-        tensor_map["model.layers."+str(i)+".mlp.gate_proj"] = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".feed_forward.w1"]     = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_GATE, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["model.layers."+str(i)+".mlp.gate_proj"] = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".feed_forward.w1"]     = mapped_to  # llama-pth
+
         # Feed-forward down
-        mapped_to = "blk."+str(i)+".ffn_down"
-        tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # gptneox
-        tensor_map["transformer.h."+str(i)+".mlp.c_proj"]          = mapped_to # gpt2
-        tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"]  = mapped_to # mpt
-        tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"]   = mapped_to # falcon
-        tensor_map["model.layers."+str(i)+".mlp.down_proj"]        = mapped_to # llama-hf
-        tensor_map["layers."+str(i)+".feed_forward.w2"]            = mapped_to # llama-pth
+        mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_DOWN, None)
+        mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
+
+        tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_4h_to_h"] = mapped_to  # gptneox
+        tensor_map["transformer.h."+str(i)+".mlp.c_proj"]          = mapped_to  # gpt2
+        tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"]  = mapped_to  # mpt
+        tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"]   = mapped_to  # falcon
+        tensor_map["model.layers."+str(i)+".mlp.down_proj"]        = mapped_to  # llama-hf
+        tensor_map["layers."+str(i)+".feed_forward.w2"]            = mapped_to  # llama-pth
 
     return tensor_map
 
@@ -168,6 +323,7 @@ def get_tensor_name_map(n_blocks : int):
 # implementation
 #
 
+
 class GGMLQuantizationType(IntEnum):
     F32 = 0
     F16 = 1
@@ -203,14 +359,16 @@ class GGUFValueType(IntEnum):
 
 
 class GGUFWriter:
-    def __init__(self, fout: IO):
-        self.fout = fout
+    def __init__(self, path: str, arch: str):
+        self.fout = open(path, "wb")
+        self.arch = arch
         self.offset_tensor = 0
         self.data_alignment = GGUF_DEFAULT_ALIGNMENT
         self.kv_data = b""
         self.kv_data_count = 0
         self.ti_data = b""
         self.ti_data_count = 0
+        self.add_architecture()
 
     def write_header_to_file(self):
         self.fout.write(struct.pack("<I", GGUF_MAGIC))
@@ -228,11 +386,6 @@ class GGUFWriter:
         self.fout.write(self.ti_data)
         self.flush()
 
-    @classmethod
-    def open(cls, path: str) -> "GGUFWriter":
-        f = open(path, "wb")
-        return cls(f)
-
     def add_key(self, key: str):
         self.add_val(key, GGUFValueType.STRING, add_vtype=False)
 
@@ -269,7 +422,8 @@ class GGUFWriter:
         self.add_val(val, GGUFValueType.BOOL)
 
     def add_string(self, key: str, val: str):
-        if len(val) == 0: return
+        if len(val) == 0:
+            return
         self.add_key(key)
         self.add_val(val, GGUFValueType.STRING)
 
@@ -323,6 +477,8 @@ class GGUFWriter:
         return ((x + n - 1) // n) * n
 
     def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.dtype, tensor_nbytes: int):
+        assert tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
+
         encoded_name = name.encode("utf8")
         self.ti_data += struct.pack("<I", len(encoded_name))
         self.ti_data += encoded_name
@@ -331,14 +487,26 @@ class GGUFWriter:
         for i in range(n_dims):
             self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
 
-        assert tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
         dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
         self.ti_data += struct.pack("<I", dtype)
         self.ti_data += struct.pack("<Q", self.offset_tensor)
         self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
         self.ti_data_count += 1
 
-    def write_tensor_to_file(self, tensor: np.ndarray):
+    def add_tensor(self, name: str, tensor: np.ndarray):
+        if not hasattr(self, "temp_file"):
+            self.temp_file = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
+            self.temp_file.seek(0)
+
+        self.add_tensor_info(name, tensor.shape, tensor.dtype, tensor.nbytes)
+
+        tensor.tofile(self.temp_file)
+
+        pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
+        if pad != 0:
+            self.temp_file.write(bytes([0] * pad))
+
+    def write_tensor_data(self, tensor: np.ndarray):
         pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
         if pad != 0:
             self.fout.write(bytes([0] * pad))
@@ -349,21 +517,33 @@ class GGUFWriter:
         if pad != 0:
             self.fout.write(bytes([0] * pad))
 
+    def write_tensors_to_file(self):
+        self.write_ti_data_to_file()
+
+        pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
+        if pad != 0:
+            self.fout.write(bytes([0] * pad))
+
+        self.temp_file.seek(0)
+
+        shutil.copyfileobj(self.temp_file, self.fout)
+        self.flush()
+        self.temp_file.close()
+
     def flush(self):
         self.fout.flush()
 
     def close(self):
         self.fout.close()
 
-    def add_architecture(self, architecture: str):
-        self.add_string(KEY_GENERAL_ARCHITECTURE,
-                        architecture)
+    def add_architecture(self):
+        self.add_string(KEY_GENERAL_ARCHITECTURE, self.arch)
 
     def add_author(self, author: str):
         self.add_string(KEY_GENERAL_AUTHOR, author)
 
     def add_tensor_data_layout(self, layout: str):
-        self.add_string(KEY_LLM_TENSOR_DATA_LAYOUT , layout)
+        self.add_string(KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
 
     def add_url(self, url: str):
         self.add_string(KEY_GENERAL_URL, url)
@@ -371,9 +551,6 @@ class GGUFWriter:
     def add_description(self, description: str):
         self.add_string(KEY_GENERAL_DESCRIPTION, description)
 
-    def add_file_type(self, file_type: str):
-        self.add_string(KEY_GENERAL_FILE_TYPE, file_type)
-
     def add_source_url(self, url: str):
         self.add_string(KEY_GENERAL_SOURCE_URL, url)
 
@@ -391,60 +568,60 @@ class GGUFWriter:
         self.data_alignment = alignment
         self.add_uint32(KEY_GENERAL_ALIGNMENT, alignment)
 
-    def add_context_length(self, llm: str, length: int):
+    def add_context_length(self, length: int):
         self.add_uint32(
-            KEY_LLM_CONTEXT_LENGTH.format(llm=llm), length)
+            KEY_LLM_CONTEXT_LENGTH.format(arch=self.arch), length)
 
-    def add_embedding_length(self, llm: str, length: int):
+    def add_embedding_length(self, length: int):
         self.add_uint32(
-            KEY_LLM_EMBEDDING_LENGTH.format(llm=llm), length)
+            KEY_LLM_EMBEDDING_LENGTH.format(arch=self.arch), length)
 
-    def add_block_count(self, llm: str, length: int):
+    def add_block_count(self, length: int):
         self.add_uint32(
-            KEY_LLM_BLOCK_COUNT.format(llm=llm), length)
+            KEY_LLM_BLOCK_COUNT.format(arch=self.arch), length)
 
-    def add_feed_forward_length(self, llm: str, length: int):
+    def add_feed_forward_length(self, length: int):
         self.add_uint32(
-            KEY_LLM_FEED_FORWARD_LENGTH.format(llm=llm), length)
+            KEY_LLM_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
 
-    def add_parallel_residual(self, llm: str, use: bool):
+    def add_parallel_residual(self, use: bool):
         self.add_bool(
-            KEY_LLM_USE_PARALLEL_RESIDUAL.format(llm=llm), use)
+            KEY_LLM_USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
 
-    def add_tensor_data_layout(self, llm: str, layout: str):
+    def add_tensor_data_layout(self, layout: str):
         self.add_string(
-            KEY_LLM_TENSOR_DATA_LAYOUT.format(llm=llm), layout)
+            KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
 
-    def add_head_count(self, llm: str, count: int):
+    def add_head_count(self, count: int):
         self.add_uint32(
-            KEY_ATTENTION_HEAD_COUNT.format(llm=llm), count)
+            KEY_ATTENTION_HEAD_COUNT.format(arch=self.arch), count)
 
-    def add_head_count_kv(self, llm: str, count: int):
+    def add_head_count_kv(self, count: int):
         self.add_uint32(
-            KEY_ATTENTION_HEAD_COUNT_KV.format(llm=llm), count)
+            KEY_ATTENTION_HEAD_COUNT_KV.format(arch=self.arch), count)
 
-    def add_max_alibi_bias(self, llm: str, bias: float):
+    def add_max_alibi_bias(self, bias: float):
         self.add_float32(
-            KEY_ATTENTION_MAX_ALIBI_BIAS.format(llm=llm), bias)
+            KEY_ATTENTION_MAX_ALIBI_BIAS.format(arch=self.arch), bias)
 
-    def add_clamp_kqv(self, llm: str, value: float):
+    def add_clamp_kqv(self, value: float):
         self.add_float32(
-            KEY_ATTENTION_CLAMP_KQV.format(llm=llm), value)
+            KEY_ATTENTION_CLAMP_KQV.format(arch=self.arch), value)
 
-    def add_layer_norm_eps(self, llm: str, value: float):
+    def add_layer_norm_eps(self, value: float):
         self.add_float32(
-            KEY_ATTENTION_LAYERNORM_EPS.format(llm=llm), value)
+            KEY_ATTENTION_LAYERNORM_EPS.format(arch=self.arch), value)
 
-    def add_layer_norm_rms_eps(self, llm: str, value: float):
+    def add_layer_norm_rms_eps(self, value: float):
         self.add_float32(
-            KEY_ATTENTION_LAYERNORM_RMS_EPS.format(llm=llm), value)
+            KEY_ATTENTION_LAYERNORM_RMS_EPS.format(arch=self.arch), value)
 
-    def add_rope_dimension_count(self, llm: str, count: int):
+    def add_rope_dimension_count(self, count: int):
         self.add_uint32(
-            KEY_ROPE_DIMENSION_COUNT.format(llm=llm), count)
+            KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
 
-    def add_rope_scale(self, llm: str, value:  float):
-        self.add_float32(KEY_ROPE_SCALE.format(llm=llm), value)
+    def add_rope_scale(self, value:  float):
+        self.add_float32(KEY_ROPE_SCALE.format(arch=self.arch), value)
 
     def add_tokenizer_model(self, model: str):
         self.add_string(KEY_TOKENIZER_MODEL, model)
@@ -476,24 +653,28 @@ class GGUFWriter:
     def add_pad_token_id(self, id: int):
         self.add_uint32(KEY_TOKENIZER_PAD_ID, id)
 
+
 # Example usage:
 if __name__ == "__main__":
     # Example usage with a file
-    gguf_writer = GGUFWriter.open("example.gguf")
+    gguf_writer = GGUFWriter("example.gguf", "llama")
 
-    gguf_writer.add_architecture("llama")
+    gguf_writer.add_architecture()
+    gguf_writer.add_block_count(12)
     gguf_writer.add_uint32("answer", 42)  # Write a 32-bit integer
     gguf_writer.add_float32("answer_in_float", 42.0)  # Write a 32-bit float
     gguf_writer.add_custom_alignment(64)
+
     tensor1 = np.ones((32,), dtype=np.float32) * 100.0
-    tensor2 = np.ones((32,), dtype=np.float32) * 101.0
-    gguf_writer.add_tensor_info("tensor0", tensor1)
-    gguf_writer.add_tensor_info("tensor1", tensor2)
+    tensor2 = np.ones((64,), dtype=np.float32) * 101.0
+    tensor3 = np.ones((96,), dtype=np.float32) * 102.0
+
+    gguf_writer.add_tensor("tensor1", tensor1)
+    gguf_writer.add_tensor("tensor2", tensor2)
+    gguf_writer.add_tensor("tensor3", tensor3)
 
     gguf_writer.write_header_to_file()
     gguf_writer.write_kv_data_to_file()
-    gguf_writer.write_ti_data_to_file()
-    gguf_writer.write_tensor_to_file(tensor1)
-    gguf_writer.write_tensor_to_file(tensor2)
+    gguf_writer.write_tensors_to_file()
 
     gguf_writer.close()
diff --git a/llama.cpp b/llama.cpp
index 38a2d5ba8..86c943fc6 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6,12 +6,11 @@
 #include <cstdio>
 #endif
 
-#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
 
 #include "ggml.h"
 
-#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
+#if !defined(GGML_USE_CUBLAS)
 #  include "ggml-alloc.h"
 #  define LLAMA_USE_ALLOCATOR
 #else
@@ -126,13 +125,6 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
 // helpers
 //
 
-template<typename T>
-static std::string to_string(const T & val) {
-    std::stringstream ss;
-    ss << val;
-    return ss.str();
-}
-
 static void zeros(std::ofstream & file, size_t n) {
     char zero = 0;
     for (size_t i = 0; i < n; ++i) {
@@ -284,7 +276,7 @@ struct llama_file {
         }
     }
 
-    uint32_t read_u32() {
+    uint32_t read_u32() const {
         uint32_t ret;
         read_raw(&ret, sizeof(ret));
         return ret;
@@ -566,10 +558,24 @@ struct llama_mlock {
 
 typedef void (*offload_func_t)(struct ggml_tensor * tensor);
 
-void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
+static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
     (void) tensor;
 }
 
+static std::string llama_token_to_text(const struct llama_context * ctx, llama_token token) {
+    std::vector<char> result(8, 0);
+    const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        int check = llama_token_to_str(ctx, token, result.data(), result.size());
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+
+    return std::string(result.data(), result.size());
+}
+
 //
 // globals
 //
@@ -604,9 +610,9 @@ enum e_model {
 static const size_t kB = 1024;
 static const size_t MB = 1024*1024;
 
-static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx)
+static std::map<e_model, size_t> MEM_REQ_SCRATCH0(int n_ctx)
 {
-    static std::map<e_model, size_t> k_sizes = {
+    std::map<e_model, size_t> k_sizes = {
         { MODEL_3B,   ((size_t) n_ctx / 16ull +  92ull) * MB },
         { MODEL_7B,   ((size_t) n_ctx / 16ull + 100ull) * MB },
         { MODEL_13B,  ((size_t) n_ctx / 12ull + 120ull) * MB },
@@ -676,22 +682,21 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
 
 // default hparams (LLaMA 7B)
 struct llama_hparams {
-    uint32_t n_vocab   = 32000;
-    uint32_t n_ctx     = 512;
-    uint32_t n_embd    = 4096;
-    uint32_t n_head    = 32;
-    uint32_t n_head_kv = 32;
-    uint32_t n_layer   = 32;
-    uint32_t n_rot     = 64;
-    uint32_t n_ff      = 11008;
+    uint32_t n_vocab     = 32000;
+    uint32_t n_ctx_train = 2048;  // the context size used during training
+    uint32_t n_ctx       = 512;   // the context size used during inference
+    uint32_t n_embd      = 4096;
+    uint32_t n_head      = 32;
+    uint32_t n_head_kv   = 32;
+    uint32_t n_layer     = 32;
+    uint32_t n_rot       = 64;
+    uint32_t n_ff        = 11008;
 
     float f_norm_rms_eps = 1e-5;
 
     float rope_freq_base  = 10000.0f;
     float rope_freq_scale = 1.0f;
 
-    enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16;
-
     bool operator!=(const llama_hparams & other) const {
         return static_cast<bool>(memcmp(this, &other, sizeof(llama_hparams))); // NOLINT
     }
@@ -762,7 +767,6 @@ struct llama_kv_cache {
 struct llama_vocab {
     // TODO:
     // - add a vector of merges
-    // - add members for bos/eos/pad/sep tokens
     //   so that we can pass it to different types of tokenizers with a common interface
 
     using id    = int32_t;
@@ -773,12 +777,24 @@ struct llama_vocab {
         float score;
     };
 
+    llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
+
     std::unordered_map<token, id> token_to_id;
-    std::vector<token_score> id_to_token;
+    std::vector<token_score>      id_to_token;
+
+    // default LLaMA special tokens
+    id special_bos_id = 1;
+    id special_eos_id = 2;
+    id special_unk_id = -1;
+    id special_sep_id = -1;
+    id special_pad_id = -1;
+
+    id linefeed_id = 13;
 };
 
 struct llama_model {
-    e_model type = MODEL_UNKNOWN;
+    e_model     type  = MODEL_UNKNOWN;
+    llama_ftype ftype = LLAMA_FTYPE_ALL_F32;
 
     llama_hparams hparams;
     llama_vocab   vocab;
@@ -881,7 +897,6 @@ struct llama_context {
     std::vector<uint8_t> work_buffer;
 
     // memory buffers used to evaluate the model
-    // TODO: move in llama_state
     llama_buffer buf_compute;
 
 #ifdef LLAMA_USE_ALLOCATOR
@@ -1024,10 +1039,13 @@ struct llama_model_loader {
     int n_tensors = 0;
     int n_created = 0;
 
+    int64_t n_elements = 0;
+
     bool use_mmap = false;
 
     llama_file file;
-    llama_file_version file_version;
+    llama_ftype ftype;
+    llama_file_version fver;
 
     std::unique_ptr<llama_mmap> mapping;
 
@@ -1041,31 +1059,81 @@ struct llama_model_loader {
         };
 
         ctx_gguf = gguf_init_from_file(fname.c_str(), params);
+        if (!ctx_gguf) {
+            throw std::runtime_error(format("%s: failed to load model from %s\n", __func__, fname.c_str()));
+        }
 
         n_kv      = gguf_get_n_kv(ctx_gguf);
         n_tensors = gguf_get_n_tensors(ctx_gguf);
 
-        file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);
+        fver = (enum llama_file_version) gguf_get_version(ctx_gguf);
 
-        // print meta data
+        for (int i = 0; i < n_tensors; i++) {
+            const char * name = gguf_get_tensor_name(ctx_gguf, i);
+            struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name);
+            n_elements += ggml_nelements(t);
+        }
+
+        LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
+                __func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(fver));
+
+        // determine file type based on the number of tensors for each quantization and print meta data
         // TODO: make optional
         {
-            LLAMA_LOG_INFO("%s: loaded meta data with %d key-value paris and %d tensors from %s (version %s)\n",
-                    __func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(file_version));
+            std::map<enum ggml_type, uint32_t> n_type;
+
+            uint32_t n_type_max = 0;
+            enum ggml_type type_max = GGML_TYPE_F32;
 
             for (int i = 0; i < n_tensors; i++) {
                 const char * name = gguf_get_tensor_name(ctx_gguf, i);
                 struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name);
 
+                n_type[meta->type]++;
+
+                if (n_type_max < n_type[meta->type]) {
+                    n_type_max = n_type[meta->type];
+                    type_max   = meta->type;
+                }
+
                 LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
             }
 
+            switch (type_max) {
+                case GGML_TYPE_F32:  ftype = LLAMA_FTYPE_ALL_F32;       break;
+                case GGML_TYPE_F16:  ftype = LLAMA_FTYPE_MOSTLY_F16;    break;
+                case GGML_TYPE_Q4_0: ftype = LLAMA_FTYPE_MOSTLY_Q4_0;   break;
+                case GGML_TYPE_Q4_1: ftype = LLAMA_FTYPE_MOSTLY_Q4_1;   break;
+                case GGML_TYPE_Q5_0: ftype = LLAMA_FTYPE_MOSTLY_Q5_0;   break;
+                case GGML_TYPE_Q5_1: ftype = LLAMA_FTYPE_MOSTLY_Q5_1;   break;
+                case GGML_TYPE_Q8_0: ftype = LLAMA_FTYPE_MOSTLY_Q8_0;   break;
+                case GGML_TYPE_Q2_K: ftype = LLAMA_FTYPE_MOSTLY_Q2_K;   break;
+                case GGML_TYPE_Q3_K: ftype = LLAMA_FTYPE_MOSTLY_Q3_K_M; break;
+                case GGML_TYPE_Q4_K: ftype = LLAMA_FTYPE_MOSTLY_Q4_K_M; break;
+                case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
+                case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K;   break;
+                default:
+                     {
+                         LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
+                         ftype = LLAMA_FTYPE_ALL_F32;
+                     } break;
+            }
+
             for (int i = 0; i < n_kv; i++) {
                 const char * name         = gguf_get_key(ctx_gguf, i);
                 const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
 
                 LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
             }
+
+            // print type counts
+            for (auto & kv : n_type) {
+                if (kv.second == 0) {
+                    continue;
+                }
+
+                LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
+            }
         }
 
         if (!llama_mmap::SUPPORTED) {
@@ -1076,6 +1144,15 @@ struct llama_model_loader {
         this->use_mmap = use_mmap;
     }
 
+    ~llama_model_loader() {
+        if (ctx_gguf) {
+            gguf_free(ctx_gguf);
+        }
+        if (ctx_meta) {
+            ggml_free(ctx_meta);
+        }
+    }
+
     const char * get_tensor_name(int i) const {
         return gguf_get_tensor_name(ctx_gguf, i);
     }
@@ -1116,6 +1193,10 @@ struct llama_model_loader {
     struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
         struct ggml_tensor * cur = ggml_get_tensor(ctx_meta, name.c_str());
 
+        if (cur == NULL) {
+            throw std::runtime_error(format("%s: tensor '%s' not found", __func__, name.c_str()));
+        }
+
         {
             bool is_ok = true;
             for (size_t i = 0; i < ne.size(); ++i) {
@@ -1240,7 +1321,7 @@ struct llama_model_loader {
 // load LLaMA models
 //
 
-static const char * llama_ftype_name(enum llama_ftype ftype) {
+const char * llama_model_ftype_name(enum llama_ftype ftype) {
     switch (ftype) {
         case LLAMA_FTYPE_ALL_F32:     return "all F32";
         case LLAMA_FTYPE_MOSTLY_F16:  return "mostly F16";
@@ -1306,6 +1387,9 @@ static void llama_model_load_internal(
 
     auto & hparams = model.hparams;
 
+    std::string general_name = "n/a";
+    std::string general_arch = "n/a";
+
     // read hparams
     {
         struct gguf_context * ctx = ml->ctx_gguf;
@@ -1324,8 +1408,22 @@ static void llama_model_load_internal(
             } \
         }
 
+        std::string tokenizer_name;
+        GGUF_GET(tokenizer_name, gguf_get_val_str, GGUF_TYPE_STRING, true, "tokenizer.ggml.model");
+
+        if (tokenizer_name == "llama") {
+            vocab.type = LLAMA_VOCAB_TYPE_SPM;
+        } else if (tokenizer_name == "gpt2") {
+            vocab.type = LLAMA_VOCAB_TYPE_BPE;
+        } else {
+            LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str());
+            LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
+            vocab.type = LLAMA_VOCAB_TYPE_SPM;
+        }
+
+        // get hparams kv
         GGUF_GET(hparams.n_vocab,        gguf_get_arr_n,   GGUF_TYPE_ARRAY,   true, "tokenizer.ggml.tokens");
-        GGUF_GET(hparams.n_ctx,          gguf_get_val_u32, GGUF_TYPE_UINT32,  true, "llama.context_length");
+        GGUF_GET(hparams.n_ctx_train,    gguf_get_val_u32, GGUF_TYPE_UINT32,  true, "llama.context_length");
         GGUF_GET(hparams.n_embd,         gguf_get_val_u32, GGUF_TYPE_UINT32,  true, "llama.embedding_length");
         GGUF_GET(hparams.n_ff,           gguf_get_val_u32, GGUF_TYPE_UINT32,  true, "llama.feed_forward_length");
         GGUF_GET(hparams.n_head,         gguf_get_val_u32, GGUF_TYPE_UINT32,  true, "llama.attention.head_count");
@@ -1336,6 +1434,18 @@ static void llama_model_load_internal(
         // n_head_kv is optional, default to n_head
         hparams.n_head_kv = hparams.n_head;
         GGUF_GET(hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "llama.attention.head_count_kv");
+
+        // get general kv
+        GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name");
+        GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture");
+
+        // special tokens
+        GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id");
+        GGUF_GET(vocab.special_eos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.eos_token_id");
+        GGUF_GET(vocab.special_unk_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.unknown_token_id");
+        GGUF_GET(vocab.special_sep_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.separator_token_id");
+        GGUF_GET(vocab.special_pad_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.padding_token_id");
+
 #undef GGUF_GET
 
         switch (hparams.n_layer) {
@@ -1352,6 +1462,8 @@ static void llama_model_load_internal(
                 } break;
         }
 
+        model.ftype = ml->ftype;
+
         hparams.n_ctx = n_ctx;
 
         // LLaMAv2
@@ -1377,12 +1489,12 @@ static void llama_model_load_internal(
 
         const int token_idx = gguf_find_key(ctx, "tokenizer.ggml.tokens");
         if (token_idx == -1) {
-            throw std::runtime_error("cannot find token list in GGUF file\n");
+            throw std::runtime_error("cannot find tokenizer vocab in model file\n");
         }
 
         const int score_idx = gguf_find_key(ctx, "tokenizer.ggml.scores");
         if (score_idx == -1) {
-            throw std::runtime_error("cannot find token scores list in GGUF file\n");
+            throw std::runtime_error("cannot find tokenizer scores in model file\n");
         }
 
         const float * scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
@@ -1395,28 +1507,50 @@ static void llama_model_load_internal(
             auto & tok_score = vocab.id_to_token[i];
             tok_score.tok = std::move(word);
             tok_score.score = scores[i];
+
+            // determine the newline token: 0x0A == 10 == '\n'
+            if (tok_score.tok == "<0x0A>") {
+                vocab.linefeed_id = i;
+            }
         }
     }
 
     {
-        LLAMA_LOG_INFO("%s: format     = %s\n",   __func__, llama_file_version_name(ml->file_version));
-        LLAMA_LOG_INFO("%s: n_vocab    = %u\n",   __func__, hparams.n_vocab);
-        LLAMA_LOG_INFO("%s: n_ctx      = %u\n",   __func__, hparams.n_ctx);
-        LLAMA_LOG_INFO("%s: n_embd     = %u\n",   __func__, hparams.n_embd);
-        LLAMA_LOG_INFO("%s: n_head     = %u\n",   __func__, hparams.n_head);
-        LLAMA_LOG_INFO("%s: n_head_kv  = %u\n",   __func__, hparams.n_head_kv);
-        LLAMA_LOG_INFO("%s: n_layer    = %u\n",   __func__, hparams.n_layer);
-        LLAMA_LOG_INFO("%s: n_rot      = %u\n",   __func__, hparams.n_rot); // a.k.a. n_embd_head, n_head_dim
-        LLAMA_LOG_INFO("%s: n_gqa      = %u\n",   __func__, hparams.n_gqa());
-        LLAMA_LOG_INFO("%s: f_norm_eps = %.1e\n", __func__, hparams.f_norm_rms_eps);
-        LLAMA_LOG_INFO("%s: n_ff       = %u\n",   __func__, hparams.n_ff);
-        LLAMA_LOG_INFO("%s: freq_base  = %.1f\n", __func__, hparams.rope_freq_base);
-        LLAMA_LOG_INFO("%s: freq_scale = %g\n",   __func__, hparams.rope_freq_scale);
-        LLAMA_LOG_INFO("%s: ftype      = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype));
-        LLAMA_LOG_INFO("%s: model size = %s\n",   __func__, llama_model_type_name(model.type));
+        // hparams
+        LLAMA_LOG_INFO("%s: format       = %s\n",     __func__, llama_file_version_name(ml->fver));
+        LLAMA_LOG_INFO("%s: arch         = %s\n",     __func__, general_arch.c_str());
+        LLAMA_LOG_INFO("%s: vocab type   = %s\n",     __func__, vocab.type == LLAMA_VOCAB_TYPE_SPM ? "SPM" : "BPE"); // TODO: fix
+        LLAMA_LOG_INFO("%s: n_vocab      = %u\n",     __func__, hparams.n_vocab);
+        LLAMA_LOG_INFO("%s: n_ctx_train  = %u\n",     __func__, hparams.n_ctx_train);
+        LLAMA_LOG_INFO("%s: n_ctx        = %u\n",     __func__, hparams.n_ctx);
+        LLAMA_LOG_INFO("%s: n_embd       = %u\n",     __func__, hparams.n_embd);
+        LLAMA_LOG_INFO("%s: n_head       = %u\n",     __func__, hparams.n_head);
+        LLAMA_LOG_INFO("%s: n_head_kv    = %u\n",     __func__, hparams.n_head_kv);
+        LLAMA_LOG_INFO("%s: n_layer      = %u\n",     __func__, hparams.n_layer);
+        LLAMA_LOG_INFO("%s: n_rot        = %u\n",     __func__, hparams.n_rot); // a.k.a. n_embd_head, n_head_dim
+        LLAMA_LOG_INFO("%s: n_gqa        = %u\n",     __func__, hparams.n_gqa());
+        LLAMA_LOG_INFO("%s: f_norm_eps   = %.1e\n",   __func__, hparams.f_norm_rms_eps);
+        LLAMA_LOG_INFO("%s: n_ff         = %u\n",     __func__, hparams.n_ff);
+        LLAMA_LOG_INFO("%s: freq_base    = %.1f\n",   __func__, hparams.rope_freq_base);
+        LLAMA_LOG_INFO("%s: freq_scale   = %g\n",     __func__, hparams.rope_freq_scale);
+        LLAMA_LOG_INFO("%s: model type   = %s\n",     __func__, llama_model_type_name(model.type));
+        LLAMA_LOG_INFO("%s: model ftype  = %s\n",     __func__, llama_model_ftype_name(model.ftype));
+        LLAMA_LOG_INFO("%s: model size   = %.2f B\n", __func__, ml->n_elements*1e-9);
+
+        // general kv
+        LLAMA_LOG_INFO("%s: general.name = %s\n",    __func__, general_name.c_str());
+
+        // special tokens
+        if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].tok.c_str() ); }
+        if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].tok.c_str() ); }
+        if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].tok.c_str() ); }
+        if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].tok.c_str() ); }
+        if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].tok.c_str() ); }
+        if (vocab.linefeed_id    != -1) { LLAMA_LOG_INFO( "%s: LF token  = %d '%s'\n", __func__, vocab.linefeed_id,    vocab.id_to_token[vocab.linefeed_id].tok.c_str() );    }
     }
 
     if (vocab_only) {
+        LLAMA_LOG_INFO("%s: vocab only - skipping tensors\n", __func__);
         return;
     }
 
@@ -1881,11 +2015,11 @@ static struct ggml_cgraph * llama_build_graph(
             ggml_set_name(Q, "Q");
 
             struct ggml_tensor * K =
-                ggml_permute(ctx0,
-                        ggml_reshape_3d(ctx0,
-                            ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa),
-                            n_embd_head, n_head_kv, n_past + N),
-                        0, 2, 1, 3);
+                ggml_view_3d(ctx0, kv_self.k,
+                        n_embd_head, n_past + N, n_head_kv,
+                        ggml_element_size(kv_self.k)*n_embd_gqa,
+                        ggml_element_size(kv_self.k)*n_embd_head,
+                        ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il);
             offload_func_kq(K);
             ggml_set_name(K, "K");
 
@@ -1914,9 +2048,9 @@ static struct ggml_cgraph * llama_build_graph(
             struct ggml_tensor * V =
                 ggml_view_3d(ctx0, kv_self.v,
                         n_past + N, n_embd_head, n_head_kv,
-                        n_ctx*ggml_element_size(kv_self.v),
-                        n_ctx*ggml_element_size(kv_self.v)*n_embd_head,
-                        n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il);
+                        ggml_element_size(kv_self.v)*n_ctx,
+                        ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
+                        ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
             offload_func_v(V);
             ggml_set_name(V, "V");
 
@@ -2071,6 +2205,13 @@ static bool llama_eval_internal(
 
     GGML_ASSERT((!tokens && embd) || (tokens && !embd)); // NOLINT
 
+    GGML_ASSERT(n_tokens > 0);
+    GGML_ASSERT(n_past >= 0);
+    GGML_ASSERT(n_threads > 0);
+    // TODO: keep the values of n_batch and n_ctx
+    // GGML_ASSERT(n_tokens <= n_batch);
+    // GGML_ASSERT(n_past + n_tokens <= n_ctx);
+
     const int64_t t_start_us = ggml_time_us();
 
 #ifdef GGML_USE_MPI
@@ -2117,11 +2258,7 @@ static bool llama_eval_internal(
 #endif
 
 #ifdef GGML_USE_METAL
-    if (lctx.ctx_metal && N == 1) {
-        // TODO: disabled until #2413 is resolved
-        //if (!ggml_metal_if_optimized(lctx.ctx_metal)) {
-        //    ggml_metal_graph_find_concurrency(lctx.ctx_metal, gf);
-        //}
+    if (lctx.ctx_metal) {
         ggml_metal_set_n_cb     (lctx.ctx_metal, n_threads);
         ggml_metal_graph_compute(lctx.ctx_metal, gf);
         ggml_metal_get_tensor   (lctx.ctx_metal, res);
@@ -2129,22 +2266,6 @@ static bool llama_eval_internal(
             ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
         }
     } else {
-        // IMPORTANT:
-        // Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
-        // ggml_graph_compute(). It uses Apple's Accelerate CBLAS API which takes advantage of the ANE or the AMX
-        // coprocessor.
-        //
-        // When we implement Matrix x Matrix Metal multiplication, we can avoid this branch.
-        // But for now, we have focused only on Matrix x Vector Metal multiplication.
-        //
-        // TODO: avoid these syncs via shared memory (ref #1696)
-        //
-        if (lctx.ctx_metal) {
-            // We need to sync the GPU KV cache with the CPU KV cache
-            ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
-            ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
-        }
-
         ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
     }
 #else
@@ -2212,52 +2333,42 @@ static bool llama_eval_internal(
 // tokenizer
 //
 
-static std::string llama_vocab_type(const llama_vocab & vocab) {
-    return vocab.token_to_id.size() == 32000 ? "spm": "bpe";
+static enum llama_vocab_type llama_vocab_get_type(const llama_vocab & vocab) {
+    return vocab.type;
 }
 
 static bool llama_is_normal_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
         return token >= 259;
     }
 
-    if (llama_vocab_type(vocab) == "bpe") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_BPE) {
         return token >= 95;
     }
 
     return false;
 }
 
-static bool llama_is_unknown_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
-        return token == 0;
-    }
-
-    // TODO: improve?
-    return false;
-}
-
-static bool llama_is_control_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
-        return token == 1 || token == 2;
-    }
-
-    // TODO: improve?
-    return false;
-}
-
 static bool llama_is_bos_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
-        return token == 1;
-    }
-
-    // TODO: improve?
-    return false;
+    return token == vocab.special_bos_id;
 }
 
 static bool llama_is_eos_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
-        return token == 2;
+    return token == vocab.special_eos_id;
+}
+
+static bool llama_is_control_token(const llama_vocab & vocab, llama_token token) {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
+        return token == llama_is_bos_token(vocab, token) || token == llama_is_eos_token(vocab, token);
+    }
+
+    // TODO: improve?
+    return false;
+}
+
+static bool llama_is_unknown_token(const llama_vocab & vocab, llama_token token) {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
+        return token == 0;
     }
 
     // TODO: improve?
@@ -2279,11 +2390,11 @@ static bool llama_is_unused_token(const llama_vocab & vocab, llama_token token)
 }
 
 static bool llama_is_byte_token(const llama_vocab & vocab, llama_token token) {
-    if (llama_vocab_type(vocab) == "spm") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
         return 3 <= token && token < 259;
     }
 
-    if (llama_vocab_type(vocab) == "bpe") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_BPE) {
         return 1 <= token && token < 95;
     }
 
@@ -2291,17 +2402,29 @@ static bool llama_is_byte_token(const llama_vocab & vocab, llama_token token) {
 }
 
 static uint8_t llama_byte_to_char(const llama_vocab & vocab, uint8_t byte) {
-    if (llama_vocab_type(vocab) == "spm") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
         return byte - 3;
     }
 
-    if (llama_vocab_type(vocab) == "bpe") {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_BPE) {
         return byte + 32;
     }
 
     return false;
 }
 
+static uint8_t llama_char_to_byte(const llama_vocab & vocab, uint8_t ch) {
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_SPM) {
+        return ch + 3;
+    }
+
+    if (llama_vocab_get_type(vocab) == LLAMA_VOCAB_TYPE_BPE) {
+        return ch - 32;
+    }
+
+    return false;
+}
+
 static std::string llama_escape_whitespace(const std::string& text) {
     std::string result;
     bool escaping = false;
@@ -2438,7 +2561,7 @@ private:
         if (p == rev_merge.end()) {
             // output any symbols that did not form tokens as bytes.
             for (int j = 0; j < (int)symbol.n; ++j) {
-                llama_vocab::id token_id = llama_byte_to_char(vocab_, symbol.text[j]);
+                llama_vocab::id token_id = llama_char_to_byte(vocab_, symbol.text[j]);
                 output.push_back(token_id);
             }
             return;
@@ -2483,7 +2606,7 @@ private:
     std::map<std::string, std::pair<int, int> > rev_merge;
 };
 
-static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, const std::string & raw_text, bool bos, bool escape) {
+static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, const std::string & raw_text, bool bos, bool escape) {
     llama_tokenizer tokenizer(vocab);
     std::vector<llama_vocab::id> output;
 
@@ -2492,7 +2615,7 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
     }
 
     if (bos) {
-        output.push_back(llama_token_bos());
+        output.push_back(vocab.special_bos_id);
     }
 
     std::string text;
@@ -2510,37 +2633,81 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
 // grammar - internal
 //
 
+struct llama_partial_utf8 {
+    uint32_t value;    // bit value so far (unshifted)
+    int      n_remain; // num bytes remaining; -1 indicates invalid sequence
+};
+
 struct llama_grammar {
     const std::vector<std::vector<llama_grammar_element>>   rules;
     std::vector<std::vector<const llama_grammar_element *>> stacks;
+
+    // buffer for partially generated UTF-8 sequence from accepted tokens
+    llama_partial_utf8                                      partial_utf8;
 };
 
 struct llama_grammar_candidate {
-    size_t           index;
-    const uint32_t * code_points;
+    size_t               index;
+    const uint32_t     * code_points;
+    llama_partial_utf8   partial_utf8;
 };
 
-// NOTE: assumes valid utf8 (but checks for overrun)
-// adds a terminating 0 for use as pointer
-std::vector<uint32_t> decode_utf8(const char * src) {
-    static const int      lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
+// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
+// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
+std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
+        const char         * src,
+        llama_partial_utf8   partial_start) {
+    static const int      lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
     const char          * pos      = src;
     std::vector<uint32_t> code_points;
+    uint32_t              value    = partial_start.value;
+    int                   n_remain = partial_start.n_remain;
+
+    // continue previous decode, if applicable
+    while (*pos != 0 && n_remain > 0) {
+        uint8_t next_byte = static_cast<uint8_t>(*pos);
+        if ((next_byte >> 6) != 2) {
+            // invalid sequence, abort
+            code_points.push_back(0);
+            return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
+        }
+        value = (value << 6) + (next_byte & 0x3F);
+        ++pos;
+        --n_remain;
+    }
+
+    if (partial_start.n_remain > 0 && n_remain == 0) {
+        code_points.push_back(value);
+    }
+
+    // decode any subsequent utf-8 sequences, which may end in an incomplete one
     while (*pos != 0) {
         uint8_t  first_byte = static_cast<uint8_t>(*pos);
         uint8_t  highbits   = first_byte >> 4;
-        int      len        = lookup[highbits];
-        uint8_t  mask       = (1 << (8 - len)) - 1;
-        uint32_t value      = first_byte & mask;
-        const char * end    = pos + len; // may overrun!
-        ++pos;
-        for ( ; pos < end && *pos != 0; ++pos) {
-            value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
+                 n_remain   = lookup[highbits] - 1;
+
+        if (n_remain < 0) {
+            // invalid sequence, abort
+            code_points.clear();
+            code_points.push_back(0);
+            return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
+        }
+
+        uint8_t  mask       = (1 << (7 - n_remain)) - 1;
+                 value      = first_byte & mask;
+        ++pos;
+        while (*pos != 0 && n_remain > 0) {
+            value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
+            ++pos;
+            --n_remain;
+        }
+        if (n_remain == 0) {
+            code_points.push_back(value);
         }
-        code_points.push_back(value);
     }
     code_points.push_back(0);
-    return code_points;
+
+    return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
 }
 
 // returns true iff pos points to the end of one of the definitions of a rule
@@ -2577,6 +2744,56 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
     return std::make_pair(found == is_positive_char, pos);
 }
 
+// returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
+// range at pos (regular or inverse range)
+// asserts that pos is pointing to a char range element
+static bool llama_grammar_match_partial_char(
+        const llama_grammar_element * pos,
+        const llama_partial_utf8      partial_utf8) {
+
+    bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
+    GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
+
+    uint32_t partial_value = partial_utf8.value;
+    int      n_remain      = partial_utf8.n_remain;
+
+    // invalid sequence or 7-bit char split across 2 bytes (overlong)
+    if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
+        return false;
+    }
+
+    // range of possible code points this partial UTF-8 sequence could complete to
+    uint32_t low  = partial_value << (n_remain * 6);
+    uint32_t high = low | ((1 << (n_remain * 6)) - 1);
+
+    if (low == 0) {
+        if (n_remain == 2) {
+            low = 1 << 11;
+        } else if (n_remain == 3) {
+            low = 1 << 16;
+        }
+    }
+
+    do {
+        if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+            // inclusive range, e.g. [a-z]
+            if (pos->value <= high && low <= pos[1].value) {
+                return is_positive_char;
+            }
+            pos += 2;
+        } else {
+            // exact char match, e.g. [a] or "a"
+            if (low <= pos->value && pos->value <= high) {
+                return is_positive_char;
+            }
+            pos += 1;
+        }
+    } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+    return !is_positive_char;
+}
+
+
 // transforms a grammar pushdown stack into N possible stacks, all ending
 // at a character range (terminal element)
 static void llama_grammar_advance_stack(
@@ -2677,8 +2894,11 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
     std::vector<llama_grammar_candidate> rejects;
 
     if (stack.empty()) {
-        // accept nothing; EOS is handled elsewhere
-        rejects.insert(rejects.end(), candidates.begin(), candidates.end());
+        for (auto tok : candidates) {
+            if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
+                rejects.push_back(tok);
+            }
+        }
         return rejects;
     }
 
@@ -2686,10 +2906,15 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
 
     std::vector<llama_grammar_candidate> next_candidates;
     for (auto tok : candidates) {
-        if (llama_grammar_match_char(stack_pos, tok.code_points[0]).first) {
-            if (tok.code_points[1] != 0) {
-                next_candidates.push_back({ tok.index, tok.code_points + 1 });
+        if (*tok.code_points == 0) {
+            // reached end of full codepoints in token, reject iff it ended in a partial sequence
+            // that cannot satisfy this position in grammar
+            if (tok.partial_utf8.n_remain != 0 &&
+                    !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
+                rejects.push_back(tok);
             }
+        } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
+            next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
         } else {
             rejects.push_back(tok);
         }
@@ -2707,7 +2932,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
 
     auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
     for (auto tok : next_rejects) {
-        rejects.push_back({ tok.index, tok.code_points - 1 });
+        rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
     }
 
     return rejects;
@@ -2772,7 +2997,7 @@ struct llama_grammar * llama_grammar_init(
         }
     } while (true);
 
-    return new llama_grammar{ std::move(vec_rules), std::move(stacks) };
+    return new llama_grammar{ std::move(vec_rules), std::move(stacks), {} };
 }
 
 void llama_grammar_free(struct llama_grammar * grammar) {
@@ -2784,7 +3009,7 @@ void llama_grammar_free(struct llama_grammar * grammar) {
 //
 
 void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates) {
-    assert(candidates->size > 0);
+    GGML_ASSERT(candidates->size > 0);
 
     const int64_t t_start_sample_us = ggml_time_us();
 
@@ -3064,7 +3289,7 @@ void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, l
 }
 
 void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, const struct llama_grammar * grammar) {
-    assert(ctx);
+    GGML_ASSERT(ctx);
     const int64_t t_start_sample_us = ggml_time_us();
 
     bool allow_eos = false;
@@ -3075,23 +3300,23 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
         }
     }
 
-    const llama_token eos = llama_token_eos();
+    const llama_token eos = llama_token_eos(ctx);
 
-    std::vector<std::vector<uint32_t>>   candidates_decoded;
-    std::vector<llama_grammar_candidate> candidates_grammar;
+    std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
+    std::vector<llama_grammar_candidate>                              candidates_grammar;
 
     for (size_t i = 0; i < candidates->size; ++i) {
-        const llama_token id  = candidates->data[i].id;
-        std::string       str = llama_token_to_str(ctx, id);
+        const llama_token id   = candidates->data[i].id;
+        const std::string text = llama_token_to_text(ctx, id);
         if (id == eos) {
             if (!allow_eos) {
                 candidates->data[i].logit = -INFINITY;
             }
-        } else if (str.empty()) {
+        } else if (text.empty()) {
             candidates->data[i].logit = -INFINITY;
         } else {
-            candidates_decoded.push_back(decode_utf8(str.c_str()));
-            candidates_grammar.push_back({ i, candidates_decoded.back().data() });
+            candidates_decoded.push_back(decode_utf8(text.c_str(), grammar->partial_utf8));
+            candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
         }
     }
 
@@ -3124,10 +3349,12 @@ void llama_sample_classifier_free_guidance(
                          float   scale) {
     int64_t t_start_sample_us = ggml_time_us();
 
-    assert(ctx);
+    GGML_ASSERT(ctx);
+
     auto n_vocab = llama_n_vocab(ctx);
-    assert(n_vocab == (int)candidates->size);
-    assert(!candidates->sorted);
+
+    GGML_ASSERT(n_vocab == (int)candidates->size);
+    GGML_ASSERT(!candidates->sorted);
 
     std::vector<float> logits_base;
     logits_base.reserve(candidates->size);
@@ -3151,7 +3378,8 @@ void llama_sample_classifier_free_guidance(
 }
 
 llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {
-    assert(ctx);
+    GGML_ASSERT(ctx);
+
     auto N = float(llama_n_vocab(ctx));
     int64_t t_start_sample_us;
     t_start_sample_us = ggml_time_us();
@@ -3257,7 +3485,8 @@ llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_da
 }
 
 llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
-    assert(ctx);
+    GGML_ASSERT(ctx);
+
     const int64_t t_start_sample_us = ggml_time_us();
     llama_sample_softmax(nullptr, candidates);
 
@@ -3281,7 +3510,7 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
 void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
     const int64_t t_start_sample_us = ggml_time_us();
 
-    if (token == llama_token_eos()) {
+    if (token == llama_token_eos(ctx)) {
         for (const auto & stack : grammar->stacks) {
             if (stack.empty()) {
                 return;
@@ -3290,12 +3519,15 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
         GGML_ASSERT(false);
     }
 
-    std::string str = llama_token_to_str(ctx, token);
+    const std::string text = llama_token_to_text(ctx, token);
+
     // Note terminating 0 in decoded string
-    auto code_points = decode_utf8(str.c_str());
+    const auto   decoded     = decode_utf8(text.c_str(), grammar->partial_utf8);
+    const auto & code_points = decoded.first;
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
         grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
     }
+    grammar->partial_utf8 = decoded.second;
     GGML_ASSERT(!grammar->stacks.empty());
 
     ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
@@ -3365,7 +3597,6 @@ static void llama_convert_tensor_internal(struct ggml_tensor * tensor, std::vect
 static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
     ggml_type quantized_type;
     llama_ftype ftype = params->ftype;
-    int nthread = params->nthread;
 
     switch (params->ftype) {
         case LLAMA_FTYPE_MOSTLY_Q4_0: quantized_type = GGML_TYPE_Q4_0; break;
@@ -3391,6 +3622,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
     }
 
+    int nthread = params->nthread;
+
     if (nthread <= 0) {
         nthread = std::thread::hardware_concurrency();
     }
@@ -3413,6 +3646,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
         const std::string name = ggml_get_name(meta);
 
+        // TODO: avoid hardcoded tensor names - use the TN_* constants
         if (name.find("attn_v.weight") != std::string::npos) {
             ++n_attention_wv;
         }
@@ -3491,6 +3725,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         } else {
             new_type = quantized_type;
 #ifdef GGML_USE_K_QUANTS
+            // TODO: avoid hardcoded tensor names - use the TN_* constants
             if (name == TN_OUTPUT) {
                 int nx = tensor->ne[0];
                 int ny = tensor->ne[1];
@@ -3505,7 +3740,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                 else if (QK_K == 64 && (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) &&
                         (i_attention_wv < n_attention_wv/8 || i_attention_wv >= 7*n_attention_wv/8)) new_type = GGML_TYPE_Q6_K;
                 ++i_attention_wv;
-            } else if (name.find("feed_forward.w2.weight") != std::string::npos) {
+            } else if (name.find("ffn_down.weight") != std::string::npos) {
                 if      (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K;
                 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
                 else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
@@ -3560,7 +3795,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
             new_data = work.data();
             std::vector<int64_t> hist_cur(1 << 4, 0);
 
-            const int chunk_size = 32 * 512;
+            static const int chunk_size = 32 * 512;
             const int nchunk = (nelements + chunk_size - 1)/chunk_size;
             const int nthread_use = nthread > 1 ? std::max(1, std::min(nthread, nchunk)) : 1;
             if (nthread_use < 2) {
@@ -3568,7 +3803,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
             } else {
                 size_t counter = 0;
                 new_size = 0;
-                auto compute = [&mutex, &counter, &hist_cur, &new_size, new_type, f32_data, new_data, nelements] () {
+                auto compute = [&mutex, &counter, &hist_cur, &new_size, new_type, f32_data, new_data, nelements]() {
                     std::vector<int64_t> local_hist;
                     size_t local_size = 0;
                     while (true) {
@@ -3661,6 +3896,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     }
 }
 
+// TODO: after the GGUF PR, this likely won't work and needs to be updated
 int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
     LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
 
@@ -4111,9 +4347,20 @@ struct llama_context * llama_new_context_with_model(
             // build worst-case graph
             int n_tokens = std::min((int)hparams.n_ctx, params.n_batch);
             int n_past = hparams.n_ctx - n_tokens;
-            llama_token token = llama_token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
+            llama_token token = llama_token_bos(ctx); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
             ggml_cgraph * gf = llama_build_graph(*ctx, &token, NULL, n_tokens, n_past);
-
+#ifdef GGML_USE_METAL
+            if (params.n_gpu_layers > 0) {
+                ctx->ctx_metal = ggml_metal_init(1);
+                if (!ctx->ctx_metal) {
+                    LLAMA_LOG_ERROR("%s: ggml_metal_init() failed\n", __func__);
+                    llama_free(ctx);
+                    return NULL;
+                }
+                ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
+                ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
+            }
+#endif
             // measure memory requirements for the graph
             size_t alloc_size = ggml_allocr_alloc_graph(ctx->alloc, gf) + tensor_alignment;
 
@@ -4131,6 +4378,11 @@ struct llama_context * llama_new_context_with_model(
 
             ctx->buf_alloc.resize(alloc_size);
             ctx->alloc = ggml_allocr_new(ctx->buf_alloc.data, ctx->buf_alloc.size, tensor_alignment);
+#ifdef GGML_USE_METAL
+            if (ctx->ctx_metal) {
+                ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
+            }
+#endif
         }
 #else
         ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type) + ggml_graph_overhead());
@@ -4145,13 +4397,6 @@ struct llama_context * llama_new_context_with_model(
 #ifdef GGML_USE_METAL
     if (params.n_gpu_layers > 0) {
         // this allocates all Metal resources and memory buffers
-        ctx->ctx_metal = ggml_metal_init(1);
-
-        if (!ctx->ctx_metal) {
-            LLAMA_LOG_ERROR("%s: ggml_metal_init() failed\n", __func__);
-            llama_free(ctx);
-            return NULL;
-        }
 
         void * data_ptr  = NULL;
         size_t data_size = 0;
@@ -4180,8 +4425,7 @@ struct llama_context * llama_new_context_with_model(
         LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.data, ctx->buf_compute.size, 0));
         LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv",   ctx->kv_self.buf.data, ctx->kv_self.buf.size, 0));
 
-        LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].data, ctx->buf_scratch[0].size, 0));
-        LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].data, ctx->buf_scratch[1].size, 0));
+        LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "alloc", ctx->buf_alloc.data, ctx->buf_alloc.size, 0));
 #undef LLAMA_METAL_CHECK_BUF
     }
 #endif
@@ -4191,7 +4435,7 @@ struct llama_context * llama_new_context_with_model(
 
     if (ggml_mpi_rank(ctx->ctx_mpi) > 0) {
         // Enter a blocking eval loop with dummy input, letting rank=0 drive the process
-        const std::vector<llama_token> tmp(ctx->model.hparams.n_ctx, llama_token_bos());
+        const std::vector<llama_token> tmp(ctx->model.hparams.n_ctx, llama_token_bos(ctx));
         while (!llama_eval(ctx, tmp.data(), tmp.size(), 0, 0)) {};
         llama_backend_free();
         exit(1);
@@ -4218,6 +4462,34 @@ void llama_free(struct llama_context * ctx) {
     delete ctx;
 }
 
+int llama_n_vocab(const struct llama_context * ctx) {
+    return ctx->model.vocab.id_to_token.size();
+}
+
+int llama_n_ctx(const struct llama_context * ctx) {
+    return ctx->model.hparams.n_ctx;
+}
+
+int llama_n_embd(const struct llama_context * ctx) {
+    return ctx->model.hparams.n_embd;
+}
+
+int llama_model_n_vocab(const struct llama_model * model) {
+    return model->vocab.id_to_token.size();
+}
+
+int llama_model_n_ctx(const struct llama_model * model) {
+    return model->hparams.n_ctx;
+}
+
+int llama_model_n_embd(const struct llama_model * model) {
+    return model->hparams.n_embd;
+}
+
+int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
+    return snprintf(buf, buf_size, "LLaMA %s %s", llama_model_type_name(model->type), llama_model_ftype_name(model->ftype));
+}
+
 int llama_model_quantize(
         const char * fname_inp,
         const char * fname_out,
@@ -4685,7 +4957,7 @@ int llama_eval_export(struct llama_context * ctx, const char * fname) {
     const int n_batch = 1;
     const int n_ctx   = 512 - n_batch;
 
-    const std::vector<llama_token> tmp(n_batch, llama_token_bos());
+    const std::vector<llama_token> tmp(n_batch, llama_token_bos(ctx));
 
     if (!llama_eval_internal(*ctx, tmp.data(), nullptr, tmp.size(), n_ctx, 1, fname)) {
         LLAMA_LOG_ERROR("%s: failed to eval\n", __func__);
@@ -4695,117 +4967,23 @@ int llama_eval_export(struct llama_context * ctx, const char * fname) {
     return 0;
 }
 
-int llama_tokenize_with_model(
-    const struct llama_model * model,
-                  const char * text,
-                 llama_token * tokens,
-                         int   n_max_tokens,
-                        bool   add_bos) {
-    auto escape = llama_vocab_type(model->vocab) == "spm";
-    auto res = llama_tokenize(model->vocab, text, add_bos, escape);
-
-    if (n_max_tokens < (int) res.size()) {
-        LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
-        return -((int) res.size());
-    }
-
-    for (size_t i = 0; i < res.size(); i++) {
-        tokens[i] = res[i];
-    }
-
-    return res.size();
+float * llama_get_logits(struct llama_context * ctx) {
+    return ctx->logits.data();
 }
 
-int llama_tokenize(
-        struct llama_context * ctx,
-                  const char * text,
-                 llama_token * tokens,
-                         int   n_max_tokens,
-                        bool   add_bos) {
-    return llama_tokenize_with_model(&ctx->model, text, tokens, n_max_tokens, add_bos);
+float * llama_get_embeddings(struct llama_context * ctx) {
+    return ctx->embedding.data();
 }
 
-std::vector<llama_token> llama_tokenize(
-        struct llama_context * ctx,
-           const std::string & text,
-                        bool   add_bos) {
-    int length = text.length() + add_bos;
-    std::vector<llama_token> result(length);
-    length = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
+int llama_get_vocab(
+        const struct llama_context * ctx,
+        const char * * strings,
+        float  * scores,
+        int capacity) {
+    return llama_model_get_vocab(&ctx->model, strings, scores, capacity);
 }
 
-int llama_tokenize_bpe(
-        struct llama_context * ctx,
-                  const char * text,
-                 llama_token * tokens,
-                         int   n_max_tokens,
-                        bool   add_bos) {
-    auto res = llama_tokenize(ctx->model.vocab, text, add_bos, false);
-
-    if (n_max_tokens < (int) res.size()) {
-        LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
-        return -((int) res.size());
-    }
-
-    for (size_t i = 0; i < res.size(); i++) {
-        tokens[i] = res[i];
-    }
-
-    return res.size();
-}
-
-std::vector<llama_token> llama_tokenize_bpe(
-        struct llama_context * ctx,
-           const std::string & text,
-                        bool   add_bos) {
-    int length = text.length() + add_bos;
-    std::vector<llama_token> result(length);
-    length = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
-}
-
-int llama_n_vocab_from_model(const struct llama_model * model) {
-    return model->vocab.id_to_token.size();
-}
-
-int llama_n_ctx_from_model(const struct llama_model * model) {
-    return model->hparams.n_ctx;
-}
-
-int llama_n_embd_from_model(const struct llama_model * model) {
-    return model->hparams.n_embd;
-}
-
-int llama_n_vocab(const struct llama_context * ctx) {
-    return ctx->model.vocab.id_to_token.size();
-}
-
-int llama_n_ctx(const struct llama_context * ctx) {
-    return ctx->model.hparams.n_ctx;
-}
-
-int llama_n_embd(const struct llama_context * ctx) {
-    return ctx->model.hparams.n_embd;
-}
-
-int llama_get_vocab_from_model(
+int llama_model_get_vocab(
         const struct llama_model * model,
         const char * * strings,
         float  * scores,
@@ -4818,28 +4996,90 @@ int llama_get_vocab_from_model(
     return n;
 }
 
-int llama_get_vocab(
-        const struct llama_context * ctx,
-        const char * * strings,
-        float  * scores,
-        int capacity) {
-    return llama_get_vocab_from_model(&ctx->model, strings, scores, capacity);
+llama_token llama_token_bos(const struct llama_context * ctx) {
+    return ctx->model.vocab.special_bos_id;
 }
 
-float * llama_get_logits(struct llama_context * ctx) {
-    return ctx->logits.data();
+llama_token llama_token_eos(const struct llama_context * ctx) {
+    return ctx->model.vocab.special_eos_id;
 }
 
-float * llama_get_embeddings(struct llama_context * ctx) {
-    return ctx->embedding.data();
+llama_token llama_token_nl(const struct llama_context * ctx) {
+    return ctx->model.vocab.linefeed_id;
+}
+
+int llama_tokenize(
+        struct llama_context * ctx,
+                  const char * text,
+                 llama_token * tokens,
+                         int   n_max_tokens,
+                        bool   add_bos) {
+    return llama_tokenize_with_model(&ctx->model, text, tokens, n_max_tokens, add_bos);
+}
+
+int llama_tokenize_bpe(
+        struct llama_context * ctx,
+                  const char * text,
+                 llama_token * tokens,
+                         int   n_max_tokens,
+                        bool   add_bos) {
+    auto res = llama_tokenize_internal(ctx->model.vocab, text, add_bos, false);
+
+    if (n_max_tokens < (int) res.size()) {
+        LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
+        return -((int) res.size());
+    }
+
+    for (size_t i = 0; i < res.size(); i++) {
+        tokens[i] = res[i];
+    }
+
+    return res.size();
+}
+
+int llama_tokenize_with_model(
+    const struct llama_model * model,
+                  const char * text,
+                 llama_token * tokens,
+                         int   n_max_tokens,
+                        bool   add_bos) {
+    auto escape = llama_vocab_get_type(model->vocab) == LLAMA_VOCAB_TYPE_SPM;
+    auto res = llama_tokenize_internal(model->vocab, text, add_bos, escape);
+
+    if (n_max_tokens < (int) res.size()) {
+        LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
+        return -((int) res.size());
+    }
+
+    for (size_t i = 0; i < res.size(); i++) {
+        tokens[i] = res[i];
+    }
+
+    return res.size();
+}
+
+int llama_token_to_str(const struct llama_context * ctx, llama_token token, char * buf, int length) {
+    return llama_token_to_str_with_model(&ctx->model, token, buf, length);
+}
+
+int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token, char * buf, int length) {
+    if (0 <= token && token < llama_model_n_vocab(&ctx->model)) {
+        std::string result = ctx->model.vocab.id_to_token[token].tok;
+        if (length < (int) result.length()) {
+            return -result.length();
+        }
+        memcpy(buf, result.c_str(), result.length());
+        return result.length();
+    }
+    return 0;
 }
 
 // does not write null-terminator to str
 int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * buf, int length) {
-    if (0 <= token && token < llama_n_vocab_from_model(model)) {
+    if (0 <= token && token < llama_model_n_vocab(model)) {
         if (llama_is_normal_token(model->vocab, token)) {
             std::string result = model->vocab.id_to_token[token].tok;
-            if (llama_vocab_type(model->vocab) == "spm") {
+            if (llama_vocab_get_type(model->vocab) == LLAMA_VOCAB_TYPE_SPM) {
                 result = llama_unescape_whitespace(result);
             }
             if (length < (int) result.length()) {
@@ -4868,62 +5108,6 @@ int llama_token_to_str_with_model(const struct llama_model * model, llama_token
     return 0;
 }
 
-int llama_token_to_str(const struct llama_context * ctx, llama_token token, char * str, int length) {
-    return llama_token_to_str_with_model(&ctx->model, token, str, length);
-}
-
-std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
-    std::vector<char> result(8, 0);
-    const int length = llama_token_to_str(ctx, token, result.data(), result.size());
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_token_to_str(ctx, token, result.data(), result.size());
-        GGML_ASSERT(check == -length);
-    } else {
-        result.resize(length);
-    }
-
-    return std::string(result.data(), result.size());
-}
-
-int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token, char * str, int length) {
-    if (0 <= token && token < llama_n_vocab_from_model(&ctx->model)) {
-        std::string result = ctx->model.vocab.id_to_token[token].tok;
-        if (length < (int) result.length()) {
-            return -result.length();
-        }
-        memcpy(str, result.c_str(), result.length());
-        return result.length();
-    }
-    return 0;
-}
-
-std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
-    std::vector<char> result(8, 0);
-    const int length = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
-    if (length < 0) {
-        result.resize(-length);
-        const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
-        GGML_ASSERT(check == -length);
-    } else {
-        result.resize(length);
-    }
-
-    return std::string(result.data(), result.size());
-}
-
-llama_token llama_token_bos(void) {
-    return 1;
-}
-
-llama_token llama_token_eos(void) {
-    return 2;
-}
-
-llama_token llama_token_nl(void) {
-    return 13;
-}
-
 struct llama_timings llama_get_timings(struct llama_context * ctx) {
     struct llama_timings result = {
         /*.t_start_ms  =*/ 1e-3 * ctx->t_start_us,
diff --git a/llama.h b/llama.h
index 2e407a1db..0ea65c1b5 100644
--- a/llama.h
+++ b/llama.h
@@ -61,6 +61,40 @@ extern "C" {
 
     typedef int llama_token;
 
+    enum llama_log_level {
+        LLAMA_LOG_LEVEL_ERROR = 2,
+        LLAMA_LOG_LEVEL_WARN  = 3,
+        LLAMA_LOG_LEVEL_INFO  = 4
+    };
+
+    enum llama_vocab_type {
+        LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
+        LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
+    };
+
+    // model file types
+    enum llama_ftype {
+        LLAMA_FTYPE_ALL_F32              = 0,
+        LLAMA_FTYPE_MOSTLY_F16           = 1, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_0          = 2, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_1          = 3, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
+        // LLAMA_FTYPE_MOSTLY_Q4_2       = 5, // support has been removed
+        // LLAMA_FTYPE_MOSTLY_Q4_3       = 6, // support has been removed
+        LLAMA_FTYPE_MOSTLY_Q8_0          = 7, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_0          = 8, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_1          = 9, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q2_K          = 10,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q3_K_S        = 11,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q3_K_M        = 12,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q3_K_L        = 13,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_K_S        = 14,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_K_M        = 15,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_S        = 16,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_M        = 17,// except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q6_K          = 18,// except 1d tensors
+    };
+
     typedef struct llama_token_data {
         llama_token id; // token id
         float logit;    // log-odds of the token
@@ -75,19 +109,6 @@ extern "C" {
 
     typedef void (*llama_progress_callback)(float progress, void *ctx);
 
-    enum llama_log_level {
-        LLAMA_LOG_LEVEL_ERROR = 2,
-        LLAMA_LOG_LEVEL_WARN  = 3,
-        LLAMA_LOG_LEVEL_INFO  = 4
-    };
-
-    // Signature for logging events
-    // Note that text includes the new line character at the end for most events.
-    // If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
-    // if it exists.
-    // It might not exist for progress report where '.' is output repeatedly.
-    typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data);
-
     struct llama_context_params {
         uint32_t seed;         // RNG seed, -1 for random
         int32_t  n_ctx;        // text context
@@ -117,28 +138,12 @@ extern "C" {
         bool embedding;  // embedding mode only
     };
 
-    // model file types
-    enum llama_ftype {
-        LLAMA_FTYPE_ALL_F32              = 0,
-        LLAMA_FTYPE_MOSTLY_F16           = 1, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_0          = 2, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_1          = 3, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
-        // LLAMA_FTYPE_MOSTLY_Q4_2       = 5, // support has been removed
-        // LLAMA_FTYPE_MOSTLY_Q4_3       = 6, // support has been removed
-        LLAMA_FTYPE_MOSTLY_Q8_0          = 7, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_0          = 8, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_1          = 9, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q2_K          = 10,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q3_K_S        = 11,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q3_K_M        = 12,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q3_K_L        = 13,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_K_S        = 14,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q4_K_M        = 15,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_S        = 16,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_M        = 17,// except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q6_K          = 18,// except 1d tensors
-    };
+    // Signature for logging events
+    // Note that text includes the new line character at the end for most events.
+    // If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
+    // if it exists.
+    // It might not exist for progress report where '.' is output repeatedly.
+    typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data);
 
     // model quantization parameters
     typedef struct llama_model_quantize_params {
@@ -199,20 +204,14 @@ extern "C" {
     LLAMA_API struct llama_context_params llama_context_default_params(void);
     LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(void);
 
-    LLAMA_API int  llama_max_devices(void);
-    LLAMA_API bool llama_mmap_supported(void);
-    LLAMA_API bool llama_mlock_supported(void);
-
-    // TODO: not great API - very likely to change
     // Initialize the llama + ggml backend
     // If numa is true, use NUMA optimizations
     // Call once at the start of the program
     LLAMA_API void llama_backend_init(bool numa);
+
     // Call once at the end of the program - currently only used for MPI
     LLAMA_API void llama_backend_free(void);
 
-    LLAMA_API int64_t llama_time_us(void);
-
     LLAMA_API struct llama_model * llama_load_model_from_file(
                              const char * path_model,
             struct llama_context_params   params);
@@ -223,10 +222,26 @@ extern "C" {
                      struct llama_model * model,
             struct llama_context_params   params);
 
-
     // Frees all allocated memory
     LLAMA_API void llama_free(struct llama_context * ctx);
 
+    LLAMA_API int64_t llama_time_us(void);
+
+    LLAMA_API int  llama_max_devices    (void);
+    LLAMA_API bool llama_mmap_supported (void);
+    LLAMA_API bool llama_mlock_supported(void);
+
+    LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
+    LLAMA_API int llama_n_embd (const struct llama_context * ctx);
+
+    LLAMA_API int llama_model_n_vocab(const struct llama_model * model);
+    LLAMA_API int llama_model_n_ctx  (const struct llama_model * model);
+    LLAMA_API int llama_model_n_embd (const struct llama_model * model);
+
+    // Get a string describing the model type
+    LLAMA_API int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size);
+
     // Returns 0 on success
     LLAMA_API int llama_model_quantize(
             const char * fname_inp,
@@ -248,9 +263,9 @@ extern "C" {
 
     LLAMA_API int llama_model_apply_lora_from_file(
             const struct llama_model * model,
-                      const char * path_lora,
-                      const char * path_base_model,
-                             int   n_threads);
+                          const char * path_lora,
+                          const char * path_base_model,
+                                 int   n_threads);
 
     // Returns the number of tokens in the KV cache
     LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
@@ -300,11 +315,48 @@ extern "C" {
     // IMPORTANT: do not use for anything else other than debugging and testing!
     LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
 
+    // Token logits obtained from the last call to llama_eval()
+    // The logits for the last token are stored in the last row
+    // Can be mutated in order to change the probabilities of the next token
+    // Rows: n_tokens
+    // Cols: n_vocab
+    LLAMA_API float * llama_get_logits(struct llama_context * ctx);
+
+    // Get the embeddings for the input
+    // shape: [n_embd] (1-dimensional)
+    LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
+
+    //
+    // Vocab
+    //
+
+    // Get the vocabulary as output parameters.
+    // Returns number of results.
+    LLAMA_API int llama_get_vocab(
+            const struct llama_context * ctx,
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);
+
+    LLAMA_API int llama_model_get_vocab(
+              const struct llama_model * model,
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);
+
+    // Special tokens
+    LLAMA_API llama_token llama_token_bos(const struct llama_context * ctx);  // beginning-of-sentence
+    LLAMA_API llama_token llama_token_eos(const struct llama_context * ctx);  // end-of-sentence
+    LLAMA_API llama_token llama_token_nl (const struct llama_context * ctx);  // next-line
+
+    //
+    // Tokenization
+    //
+
     // Convert the provided text into tokens.
     // The tokens pointer must be large enough to hold the resulting tokens.
     // Returns the number of tokens on success, no more than n_max_tokens
     // Returns a negative number on failure - the number of tokens that would have been returned
-    // TODO: not sure if correct
     LLAMA_API int llama_tokenize(
             struct llama_context * ctx,
                       const char * text,
@@ -326,39 +378,6 @@ extern "C" {
                              int   n_max_tokens,
                             bool   add_bos);
 
-    LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
-    LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
-    LLAMA_API int llama_n_embd (const struct llama_context * ctx);
-
-    LLAMA_API int llama_n_vocab_from_model(const struct llama_model * model);
-    LLAMA_API int llama_n_ctx_from_model  (const struct llama_model * model);
-    LLAMA_API int llama_n_embd_from_model (const struct llama_model * model);
-
-    // Get the vocabulary as output parameters.
-    // Returns number of results.
-    LLAMA_API int llama_get_vocab(
-            const struct llama_context * ctx,
-                          const char * * strings,
-                                 float * scores,
-                                   int   capacity);
-
-    LLAMA_API int llama_get_vocab_from_model(
-              const struct llama_model * model,
-                          const char * * strings,
-                                 float * scores,
-                                   int   capacity);
-
-    // Token logits obtained from the last call to llama_eval()
-    // The logits for the last token are stored in the last row
-    // Can be mutated in order to change the probabilities of the next token
-    // Rows: n_tokens
-    // Cols: n_vocab
-    LLAMA_API float * llama_get_logits(struct llama_context * ctx);
-
-    // Get the embeddings for the input
-    // shape: [n_embd] (1-dimensional)
-    LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
-
     // Token Id -> String. Uses the vocabulary in the provided context
     // Does not write null terminator to the buffer
     LLAMA_API int llama_token_to_str(
@@ -378,13 +397,11 @@ extern "C" {
                            llama_token   token,
                                   char * buf,
                                   int    length);
-    // Special tokens
-    LLAMA_API llama_token llama_token_bos(void);  // beginning-of-sentence
-    LLAMA_API llama_token llama_token_eos(void);  // end-of-sentence
-    LLAMA_API llama_token llama_token_nl(void);   // next-line
 
+    //
     // Grammar
     //
+
     LLAMA_API struct llama_grammar * llama_grammar_init(
             const llama_grammar_element ** rules,
                                  size_t    n_rules,
@@ -392,7 +409,9 @@ extern "C" {
 
     LLAMA_API void llama_grammar_free(struct llama_grammar * grammar);
 
+    //
     // Sampling functions
+    //
 
     /// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
     LLAMA_API void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float penalty);
@@ -469,43 +488,16 @@ extern "C" {
 }
 #endif
 
-// C++ API, will be moving to common.h soon (TM)
-#ifdef LLAMA_API_CPP
+// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
+#ifdef LLAMA_API_INTERNAL
 
 #include <vector>
 #include <string>
 
-//
-// Vocab utils
-//
-
-std::vector<llama_token> llama_tokenize(
-        struct llama_context * ctx,
-           const std::string & text,
-                        bool   add_bos);
-
-std::vector<llama_token> llama_tokenize_bpe(
-        struct llama_context * ctx,
-           const std::string & text,
-                        bool   add_bos);
-
-std::string llama_token_to_str(
-        const struct llama_context * ctx,
-                       llama_token   token);
-
-std::string llama_token_to_str_bpe(
-    const struct llama_context * ctx,
-                   llama_token   token);
-
-// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
-#ifdef LLAMA_API_INTERNAL
-
 struct ggml_tensor;
 
 const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
 
-#endif // LLAMA_API_CPP
-
 #endif // LLAMA_API_INTERNAL
 
 #endif // LLAMA_H
diff --git a/models/.editorconfig b/models/.editorconfig
new file mode 100644
index 000000000..78b36ca08
--- /dev/null
+++ b/models/.editorconfig
@@ -0,0 +1 @@
+root = true
diff --git a/models/ggml-vocab-aquila.bin b/models/ggml-vocab-aquila.bin
deleted file mode 100644
index e06b39b5a..000000000
Binary files a/models/ggml-vocab-aquila.bin and /dev/null differ
diff --git a/models/ggml-vocab-llama.bin b/models/ggml-vocab-llama.bin
deleted file mode 100644
index c1a3194c4..000000000
Binary files a/models/ggml-vocab-llama.bin and /dev/null differ
diff --git a/models/ggml-vocab-llama.gguf b/models/ggml-vocab-llama.gguf
new file mode 100644
index 000000000..c50db67dc
Binary files /dev/null and b/models/ggml-vocab-llama.gguf differ
diff --git a/scripts/get-wikitext-2.sh b/scripts/get-wikitext-2.sh
new file mode 100644
index 000000000..98aec3e3e
--- /dev/null
+++ b/scripts/get-wikitext-2.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9831b13d4..4ccefe932 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,7 +2,7 @@ function(llama_build_executable source)
     get_filename_component(TEST_TARGET ${source} NAME_WE)
     add_executable(${TEST_TARGET} ${source})
     install(TARGETS ${TEST_TARGET} RUNTIME)
-    target_link_libraries(${TEST_TARGET} PRIVATE llama)
+    target_link_libraries(${TEST_TARGET} PRIVATE llama common)
 endfunction()
 
 function(llama_test_executable name source)
@@ -17,7 +17,7 @@ function(llama_build_and_test_executable source)
     get_filename_component(TEST_TARGET ${source} NAME_WE)
     add_executable(${TEST_TARGET} ${source})
     install(TARGETS ${TEST_TARGET} RUNTIME)
-    target_link_libraries(${TEST_TARGET} PRIVATE llama)
+    target_link_libraries(${TEST_TARGET} PRIVATE llama common)
     add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
 endfunction()
 
@@ -26,10 +26,11 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
 llama_build_and_test_executable(test-quantize-perf.cpp)
 llama_build_and_test_executable(test-sampling.cpp)
 llama_build_executable(test-tokenizer-0.cpp)
-llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
+llama_test_executable (test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 llama_build_executable(test-tokenizer-1.cpp)
-llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
-llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.bin)
-llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
+llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
+llama_build_and_test_executable(test-grammar-parser.cpp)
+llama_build_and_test_executable(test-llama-grammar.cpp)
 llama_build_and_test_executable(test-grad0.cpp) # SLOW
 # llama_build_and_test_executable(test-opt.cpp) # SLOW
diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp
index 7022988b4..a0b5b043d 100644
--- a/tests/test-grammar-parser.cpp
+++ b/tests/test-grammar-parser.cpp
@@ -3,7 +3,8 @@
 #endif
 
 #include "llama.h"
-#include "examples/grammar-parser.cpp"
+#include "grammar-parser.h"
+
 #include <cassert>
 
 int main()
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp
new file mode 100644
index 000000000..73dd33dd2
--- /dev/null
+++ b/tests/test-llama-grammar.cpp
@@ -0,0 +1,403 @@
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include "llama.cpp" // TODO: not great
+#include "grammar-parser.h"
+
+#include <cassert>
+
+int main()
+{
+    grammar_parser::parse_state parsed_grammar;
+
+    std::vector<std::pair<std::string, uint32_t>> expected = {
+        {"expr", 2},
+        {"expr_6", 6},
+        {"expr_7", 7},
+        {"ident", 8},
+        {"ident_10", 10},
+        {"num", 9},
+        {"num_11", 11},
+        {"root", 0},
+        {"root_1", 1},
+        {"root_5", 5},
+        {"term", 4},
+        {"ws", 3},
+        {"ws_12", 12},
+    };
+
+    std::vector<std::vector<llama_grammar_element>> expected_rules = {
+        {{LLAMA_GRETYPE_RULE_REF, 5}, {LLAMA_GRETYPE_END, 0}},
+        {
+            {LLAMA_GRETYPE_RULE_REF, 2},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_RULE_REF, 4},
+            {LLAMA_GRETYPE_CHAR, 10},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {{LLAMA_GRETYPE_RULE_REF, 4}, {LLAMA_GRETYPE_RULE_REF, 7}, {LLAMA_GRETYPE_END, 0}},
+        {{LLAMA_GRETYPE_RULE_REF, 12}, {LLAMA_GRETYPE_END, 0}},
+        {
+            {LLAMA_GRETYPE_RULE_REF, 8},
+            {LLAMA_GRETYPE_ALT, 0},
+            {LLAMA_GRETYPE_RULE_REF, 9},
+            {LLAMA_GRETYPE_ALT, 0},
+            {LLAMA_GRETYPE_CHAR, 40},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_RULE_REF, 2},
+            {LLAMA_GRETYPE_CHAR, 41},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {{LLAMA_GRETYPE_RULE_REF, 1}, {LLAMA_GRETYPE_RULE_REF, 5}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_RULE_REF, 1}, {LLAMA_GRETYPE_END, 0}},
+        {
+            {LLAMA_GRETYPE_CHAR, 45},
+            {LLAMA_GRETYPE_CHAR_ALT, 43},
+            {LLAMA_GRETYPE_CHAR_ALT, 42},
+            {LLAMA_GRETYPE_CHAR_ALT, 47},
+            {LLAMA_GRETYPE_RULE_REF, 4},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {{LLAMA_GRETYPE_RULE_REF, 6}, {LLAMA_GRETYPE_RULE_REF, 7}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}},
+        {
+            {LLAMA_GRETYPE_CHAR, 97},
+            {LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
+            {LLAMA_GRETYPE_RULE_REF, 10},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {{LLAMA_GRETYPE_RULE_REF, 11}, {LLAMA_GRETYPE_RULE_REF, 3}, {LLAMA_GRETYPE_END, 0}},
+        {
+            {LLAMA_GRETYPE_CHAR, 97},
+            {LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
+            {LLAMA_GRETYPE_CHAR_ALT, 48},
+            {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
+            {LLAMA_GRETYPE_CHAR_ALT, 95},
+            {LLAMA_GRETYPE_RULE_REF, 10},
+            {LLAMA_GRETYPE_ALT, 0},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 48},
+            {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
+            {LLAMA_GRETYPE_RULE_REF, 11},
+            {LLAMA_GRETYPE_ALT, 0},
+            {LLAMA_GRETYPE_CHAR, 48},
+            {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
+            {LLAMA_GRETYPE_END, 0},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 32},
+            {LLAMA_GRETYPE_CHAR_ALT, 9},
+            {LLAMA_GRETYPE_CHAR_ALT, 10},
+            {LLAMA_GRETYPE_RULE_REF, 12},
+            {LLAMA_GRETYPE_ALT, 0},
+            {LLAMA_GRETYPE_END, 0},
+        },
+    };
+
+    for (auto pair : expected)
+    {
+        parsed_grammar.symbol_ids[pair.first] = pair.second;
+    }
+
+    for (auto rule : expected_rules)
+    {
+        parsed_grammar.rules.push_back({});
+        for (auto element : rule)
+        {
+            parsed_grammar.rules.back().push_back(element);
+        }
+    }
+
+    llama_grammar *grammar = NULL;
+    std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
+    grammar = llama_grammar_init(
+        grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
+
+    std::vector<std::vector<llama_grammar_element>> expected_stacks = {
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_CHAR, 97},
+        },
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_RULE_REF, 5},
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_CHAR, 40},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_CHAR, 97},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_RULE_REF, 3},
+            {LLAMA_GRETYPE_CHAR, 48},
+        },
+        {
+            {LLAMA_GRETYPE_CHAR, 61},
+            {LLAMA_GRETYPE_RULE_REF, 7},
+            {LLAMA_GRETYPE_CHAR, 40},
+        }};
+
+    auto index = 0;
+    for (auto stack : grammar->stacks)
+    {
+        // compare stack to expected_stack
+        for (uint32_t i = 0; i < stack.size(); i++)
+        {
+            auto element = stack[i];
+            auto expected_element = expected_stacks[index][i];
+
+            // pretty print error message before asserting
+            if (expected_element.type != element->type || expected_element.value != element->value)
+            {
+                fprintf(stderr, "index: %d\n", index);
+                fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
+                fprintf(stderr, "actual_element: %d, %d\n", element->type, element->value);
+                fprintf(stderr, "expected_element != actual_element\n");
+            }
+
+            assert(expected_element.type == element->type && expected_element.value == element->value);
+        }
+        index++;
+    }
+
+    std::vector<std::vector<const llama_grammar_element *>> next_stacks;
+    std::vector<llama_grammar_candidate> next_candidates;
+    next_candidates.resize(24);
+
+    for (size_t i = 0; i < 24; ++i)
+    {
+        uint32_t *cp = new uint32_t[2]; // dynamically allocate memory for code_point
+        cp[0] = 37 + i;
+        cp[1] = 0;
+        next_candidates[i] = {i, cp, {}};
+    }
+
+    std::vector<std::vector<std::pair<uint32_t, uint16_t>>> expected_reject = {
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {11, 48},
+            {12, 49},
+            {13, 50},
+            {14, 51},
+            {15, 52},
+            {16, 53},
+            {17, 54},
+            {18, 55},
+            {19, 56},
+            {20, 57},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {11, 48},
+            {12, 49},
+            {13, 50},
+            {14, 51},
+            {15, 52},
+            {16, 53},
+            {17, 54},
+            {18, 55},
+            {19, 56},
+            {20, 57},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {11, 48},
+            {12, 49},
+            {13, 50},
+            {14, 51},
+            {15, 52},
+            {16, 53},
+            {17, 54},
+            {18, 55},
+            {19, 56},
+            {20, 57},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {3, 40},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+        {
+            {0, 37},
+            {1, 38},
+            {2, 39},
+            {4, 41},
+            {5, 42},
+            {6, 43},
+            {7, 44},
+            {8, 45},
+            {9, 46},
+            {10, 47},
+            {11, 48},
+            {12, 49},
+            {13, 50},
+            {14, 51},
+            {15, 52},
+            {16, 53},
+            {17, 54},
+            {18, 55},
+            {19, 56},
+            {20, 57},
+            {21, 58},
+            {22, 59},
+            {23, 60},
+        },
+    };
+
+    std::vector<llama_grammar_candidate> rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[0], next_candidates);
+
+    std::vector<std::vector<llama_grammar_candidate>> all_rejects;
+
+    for (std::size_t count = 0; count < grammar->stacks.size(); ++count)
+    {
+        rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[count], next_candidates);
+        all_rejects.push_back(rejects);
+    }
+
+    index = 0;
+    for (auto rej : all_rejects)
+    {
+        for (uint32_t i = 0; i < rej.size(); i++)
+        {
+            auto element = rej[i];
+            auto expected_element = expected_reject[index][i];
+            assert(element.index == expected_element.first && *element.code_points == expected_element.second);
+        }
+        index++;
+    }
+
+    for (auto &candidate : next_candidates)
+    {
+        delete[] candidate.code_points;
+        candidate.code_points = nullptr;
+    }
+    delete grammar;
+    return 0;
+}
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index f973271a3..81764565b 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -1,5 +1,5 @@
-#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
+#include "common.h"
 
 #include <cstdio>
 #include <string>
@@ -41,7 +41,7 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
     };
 
     return _k_tests;
-};
+}
 
 int main(int argc, char **argv) {
     if (argc < 2) {
@@ -89,6 +89,8 @@ int main(int argc, char **argv) {
         return 2;
     }
 
+    bool success = true;
+
     for (const auto & test_kv : k_tests()) {
         std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
         fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
@@ -103,7 +105,8 @@ int main(int argc, char **argv) {
         }
 
         if (!correct) {
-            fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
+            fprintf(stderr, "%s : failed test:    '%s'\n", __func__, test_kv.first.c_str());
+            fprintf(stderr, "%s : detokenized to: '%s'\n", __func__, unescape_whitespace(ctx, test_kv.second).c_str());
             fprintf(stderr, "%s : expected tokens: ", __func__);
             for (const auto & t : test_kv.second) {
                 fprintf(stderr, "%6d, ", t);
@@ -115,9 +118,7 @@ int main(int argc, char **argv) {
             }
             fprintf(stderr, "\n");
 
-            llama_free_model(model);
-            llama_free(ctx);
-            return 3;
+            success = false;
         }
     }
 
@@ -126,5 +127,5 @@ int main(int argc, char **argv) {
 
     llama_backend_free();
 
-    return 0;
+    return success ? 0 : 3;
 }
diff --git a/tests/test-tokenizer-1.cpp b/tests/test-tokenizer-1.cpp
index 1837a8361..106017fbe 100644
--- a/tests/test-tokenizer-1.cpp
+++ b/tests/test-tokenizer-1.cpp
@@ -1,5 +1,5 @@
-#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
+#include "common.h"
 
 #include <cassert>
 #include <cstdio>
@@ -120,8 +120,7 @@ int main(int argc, char **argv) {
         std::string str = u32converter.to_bytes(u32str);
         std::vector<llama_token> tokens = llama_tokenize(ctx, escape_whitespace(str).c_str(), false);
         if (tokens.size() == 1) {
-            fprintf(stderr, "%s : info: %s tokenized to %d \n",
-                __func__, str.c_str(), tokens[0]);
+            fprintf(stderr, "%s : info: %s tokenized to %d \n", __func__, str.c_str(), tokens[0]);
         }
     }