diff --git a/.github/workflows/zig-build.yml b/.github/workflows/zig-build.yml new file mode 100644 index 000000000..68a698ab9 --- /dev/null +++ b/.github/workflows/zig-build.yml @@ -0,0 +1,25 @@ +name: Zig CI + +on: + pull_request: + push: + branches: + - master + +jobs: + build: + strategy: + fail-fast: false + matrix: + runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.runs-on }} + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + fetch-depth: 0 + - uses: goto-bus-stop/setup-zig@v2 + with: + version: 0.11.0 + - name: Build Summary + run: zig build --summary all -freference-trace diff --git a/examples/server/api_like_OAI.py b/examples/server/api_like_OAI.py index ed19237b0..14d2dcf65 100755 --- a/examples/server/api_like_OAI.py +++ b/examples/server/api_like_OAI.py @@ -27,10 +27,10 @@ def is_present(json, key): buf = json[key] except KeyError: return False + if json[key] == None: + return False return True - - #convert chat to prompt def convert_chat(messages): prompt = "" + args.chat_prompt.replace("\\n", "\n") diff --git a/ggml-metal.m b/ggml-metal.m index 8549fa9ac..7ff131cc4 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -301,12 +301,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { #if TARGET_OS_OSX // print MTL GPU family: GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]); - GGML_METAL_LOG_INFO("%s: GPU arch: %s\n", __func__, [[ctx->device architecture].name UTF8String]); // determine max supported GPU family // https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf - for (int i = MTLGPUFamilyApple9 + 10; i >= MTLGPUFamilyApple1; --i) { + for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) { if ([ctx->device supportsFamily:i]) { GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i); break; diff --git a/k_quants.h b/k_quants.h index adc6a3913..9de089e7a 100644 --- a/k_quants.h +++ b/k_quants.h @@ -29,7 +29,7 @@ // 2-bit quantization // weight is represented as x = a * q + b -// 16 blocks of 16 elemenets each +// 16 blocks of 16 elements each // Effectively 2.5625 bits per weight typedef struct { uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits @@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w // 3-bit quantization // weight is represented as x = a * q -// 16 blocks of 16 elemenets each +// 16 blocks of 16 elements each // Effectively 3.4375 bits per weight #ifdef GGML_QKK_64 typedef struct { @@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + #endif // 4-bit quantization -// 16 blocks of 32 elements each +// 8 blocks of 32 elements each // weight is represented as x = a * q + b // Effectively 4.5 bits per weight #ifdef GGML_QKK_64 @@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/ #endif // 5-bit quantization -// 16 blocks of 32 elements each +// 8 blocks of 32 elements each // weight is represented as x = a * q + b // Effectively 5.5 bits per weight #ifdef GGML_QKK_64 @@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/ // 6-bit quantization // weight is represented as x = a * q -// 16 blocks of 16 elemenets each +// 16 blocks of 16 elements each // Effectively 6.5625 bits per weight typedef struct { uint8_t ql[QK_K/2]; // quants, lower 4 bits