Merge branch 'master' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # build.zig
2023-10-08 22:55:44 +08:00 · 2023-10-08 22:55:44 +08:00 · e967717385
commit e967717385
parent 840b244c17 eee42c670e
4 changed files with 33 additions and 9 deletions
--- a/.github/workflows/zig-build.yml
+++ b/.github/workflows/zig-build.yml
@ -0,0 +1,25 @@
 name: Zig CI
 on:
  pull_request:
  push:
    branches:
      - master
 jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        runs-on: [ubuntu-latest, macos-latest, windows-latest]
    runs-on: ${{ matrix.runs-on }}
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
          fetch-depth: 0
      - uses: goto-bus-stop/setup-zig@v2
        with:
          version: 0.11.0
      - name: Build Summary
        run: zig build --summary all -freference-trace
--- a/examples/server/api_like_OAI.py
+++ b/examples/server/api_like_OAI.py
@ -27,10 +27,10 @@ def is_present(json, key):
        buf = json[key]
    except KeyError:
        return False
    if json[key] == None:
        return False
    return True
 #convert chat to prompt
 def convert_chat(messages):
    prompt = "" + args.chat_prompt.replace("\\n", "\n")
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -301,12 +301,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
 #if TARGET_OS_OSX
    // print MTL GPU family:
    GGML_METAL_LOG_INFO("%s: GPU name:   %s\n", __func__, [[ctx->device name] UTF8String]);
    GGML_METAL_LOG_INFO("%s: GPU arch:   %s\n", __func__, [[ctx->device architecture].name UTF8String]);
    // determine max supported GPU family
    // https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
    // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
-    for (int i = MTLGPUFamilyApple9 + 10; i >= MTLGPUFamilyApple1; --i) {
+    for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) {
        if ([ctx->device supportsFamily:i]) {
            GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i);
            break;
--- a/k_quants.h
+++ b/k_quants.h
@ -29,7 +29,7 @@
 // 2-bit quantization
 // weight is represented as x = a * q + b
-// 16 blocks of 16 elemenets each
+// 16 blocks of 16 elements each
 // Effectively 2.5625 bits per weight
 typedef struct {
    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w
 // 3-bit quantization
 // weight is represented as x = a * q
-// 16 blocks of 16 elemenets each
+// 16 blocks of 16 elements each
 // Effectively 3.4375 bits per weight
 #ifdef GGML_QKK_64
 typedef struct {
@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 +
 #endif
 // 4-bit quantization
-// 16 blocks of 32 elements each
+// 8 blocks of 32 elements each
 // weight is represented as x = a * q + b
 // Effectively 4.5 bits per weight
 #ifdef GGML_QKK_64
@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
 #endif
 // 5-bit quantization
-// 16 blocks of 32 elements each
+// 8 blocks of 32 elements each
 // weight is represented as x = a * q + b
 // Effectively 5.5 bits per weight
 #ifdef GGML_QKK_64
@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
 // 6-bit quantization
 // weight is represented as x = a * q
-// 16 blocks of 16 elemenets each
+// 16 blocks of 16 elements each
 // Effectively 6.5625 bits per weight
 typedef struct {
    uint8_t ql[QK_K/2];      // quants, lower 4 bits