Merge branch 'master' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	build.zig
This commit is contained in:
Concedo 2023-10-08 22:55:44 +08:00
commit e967717385
4 changed files with 33 additions and 9 deletions

25
.github/workflows/zig-build.yml vendored Normal file
View file

@ -0,0 +1,25 @@
name: Zig CI
on:
pull_request:
push:
branches:
- master
jobs:
build:
strategy:
fail-fast: false
matrix:
runs-on: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.runs-on }}
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
fetch-depth: 0
- uses: goto-bus-stop/setup-zig@v2
with:
version: 0.11.0
- name: Build Summary
run: zig build --summary all -freference-trace

View file

@ -27,10 +27,10 @@ def is_present(json, key):
buf = json[key] buf = json[key]
except KeyError: except KeyError:
return False return False
if json[key] == None:
return False
return True return True
#convert chat to prompt #convert chat to prompt
def convert_chat(messages): def convert_chat(messages):
prompt = "" + args.chat_prompt.replace("\\n", "\n") prompt = "" + args.chat_prompt.replace("\\n", "\n")

View file

@ -301,12 +301,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
#if TARGET_OS_OSX #if TARGET_OS_OSX
// print MTL GPU family: // print MTL GPU family:
GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]); GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]);
GGML_METAL_LOG_INFO("%s: GPU arch: %s\n", __func__, [[ctx->device architecture].name UTF8String]);
// determine max supported GPU family // determine max supported GPU family
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf // https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
for (int i = MTLGPUFamilyApple9 + 10; i >= MTLGPUFamilyApple1; --i) { for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) {
if ([ctx->device supportsFamily:i]) { if ([ctx->device supportsFamily:i]) {
GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i); GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - MTLGPUFamilyApple1 + 1, i);
break; break;

View file

@ -29,7 +29,7 @@
// 2-bit quantization // 2-bit quantization
// weight is represented as x = a * q + b // weight is represented as x = a * q + b
// 16 blocks of 16 elemenets each // 16 blocks of 16 elements each
// Effectively 2.5625 bits per weight // Effectively 2.5625 bits per weight
typedef struct { typedef struct {
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w
// 3-bit quantization // 3-bit quantization
// weight is represented as x = a * q // weight is represented as x = a * q
// 16 blocks of 16 elemenets each // 16 blocks of 16 elements each
// Effectively 3.4375 bits per weight // Effectively 3.4375 bits per weight
#ifdef GGML_QKK_64 #ifdef GGML_QKK_64
typedef struct { typedef struct {
@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 +
#endif #endif
// 4-bit quantization // 4-bit quantization
// 16 blocks of 32 elements each // 8 blocks of 32 elements each
// weight is represented as x = a * q + b // weight is represented as x = a * q + b
// Effectively 4.5 bits per weight // Effectively 4.5 bits per weight
#ifdef GGML_QKK_64 #ifdef GGML_QKK_64
@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
#endif #endif
// 5-bit quantization // 5-bit quantization
// 16 blocks of 32 elements each // 8 blocks of 32 elements each
// weight is represented as x = a * q + b // weight is represented as x = a * q + b
// Effectively 5.5 bits per weight // Effectively 5.5 bits per weight
#ifdef GGML_QKK_64 #ifdef GGML_QKK_64
@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
// 6-bit quantization // 6-bit quantization
// weight is represented as x = a * q // weight is represented as x = a * q
// 16 blocks of 16 elemenets each // 16 blocks of 16 elements each
// Effectively 6.5625 bits per weight // Effectively 6.5625 bits per weight
typedef struct { typedef struct {
uint8_t ql[QK_K/2]; // quants, lower 4 bits uint8_t ql[QK_K/2]; // quants, lower 4 bits