Merge branch 'master' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # build.zig
This commit is contained in:
commit
e967717385
4 changed files with 33 additions and 9 deletions
25
.github/workflows/zig-build.yml
vendored
Normal file
25
.github/workflows/zig-build.yml
vendored
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
name: Zig CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
runs-on: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
fetch-depth: 0
|
||||||
|
- uses: goto-bus-stop/setup-zig@v2
|
||||||
|
with:
|
||||||
|
version: 0.11.0
|
||||||
|
- name: Build Summary
|
||||||
|
run: zig build --summary all -freference-trace
|
|
@ -27,10 +27,10 @@ def is_present(json, key):
|
||||||
buf = json[key]
|
buf = json[key]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return False
|
return False
|
||||||
|
if json[key] == None:
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#convert chat to prompt
|
#convert chat to prompt
|
||||||
def convert_chat(messages):
|
def convert_chat(messages):
|
||||||
prompt = "" + args.chat_prompt.replace("\\n", "\n")
|
prompt = "" + args.chat_prompt.replace("\\n", "\n")
|
||||||
|
|
|
@ -301,12 +301,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||||
#if TARGET_OS_OSX
|
#if TARGET_OS_OSX
|
||||||
// print MTL GPU family:
|
// print MTL GPU family:
|
||||||
GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]);
|
GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]);
|
||||||
GGML_METAL_LOG_INFO("%s: GPU arch: %s\n", __func__, [[ctx->device architecture].name UTF8String]);
|
|
||||||
|
|
||||||
// determine max supported GPU family
|
// determine max supported GPU family
|
||||||
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
|
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
|
||||||
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
for (int i = MTLGPUFamilyApple9 + 10; i >= MTLGPUFamilyApple1; --i) {
|
for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) {
|
||||||
if ([ctx->device supportsFamily:i]) {
|
if ([ctx->device supportsFamily:i]) {
|
||||||
GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i);
|
GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i);
|
||||||
break;
|
break;
|
||||||
|
|
10
k_quants.h
10
k_quants.h
|
@ -29,7 +29,7 @@
|
||||||
|
|
||||||
// 2-bit quantization
|
// 2-bit quantization
|
||||||
// weight is represented as x = a * q + b
|
// weight is represented as x = a * q + b
|
||||||
// 16 blocks of 16 elemenets each
|
// 16 blocks of 16 elements each
|
||||||
// Effectively 2.5625 bits per weight
|
// Effectively 2.5625 bits per weight
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||||
|
@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w
|
||||||
|
|
||||||
// 3-bit quantization
|
// 3-bit quantization
|
||||||
// weight is represented as x = a * q
|
// weight is represented as x = a * q
|
||||||
// 16 blocks of 16 elemenets each
|
// 16 blocks of 16 elements each
|
||||||
// Effectively 3.4375 bits per weight
|
// Effectively 3.4375 bits per weight
|
||||||
#ifdef GGML_QKK_64
|
#ifdef GGML_QKK_64
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 +
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 4-bit quantization
|
// 4-bit quantization
|
||||||
// 16 blocks of 32 elements each
|
// 8 blocks of 32 elements each
|
||||||
// weight is represented as x = a * q + b
|
// weight is represented as x = a * q + b
|
||||||
// Effectively 4.5 bits per weight
|
// Effectively 4.5 bits per weight
|
||||||
#ifdef GGML_QKK_64
|
#ifdef GGML_QKK_64
|
||||||
|
@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 5-bit quantization
|
// 5-bit quantization
|
||||||
// 16 blocks of 32 elements each
|
// 8 blocks of 32 elements each
|
||||||
// weight is represented as x = a * q + b
|
// weight is represented as x = a * q + b
|
||||||
// Effectively 5.5 bits per weight
|
// Effectively 5.5 bits per weight
|
||||||
#ifdef GGML_QKK_64
|
#ifdef GGML_QKK_64
|
||||||
|
@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
|
||||||
|
|
||||||
// 6-bit quantization
|
// 6-bit quantization
|
||||||
// weight is represented as x = a * q
|
// weight is represented as x = a * q
|
||||||
// 16 blocks of 16 elemenets each
|
// 16 blocks of 16 elements each
|
||||||
// Effectively 6.5625 bits per weight
|
// Effectively 6.5625 bits per weight
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue