Merge 3cbdbe8947
into d7b31a9d84
This commit is contained in:
commit
930d08b7a0
2 changed files with 35 additions and 6 deletions
28
.github/workflows/build.yml
vendored
28
.github/workflows/build.yml
vendored
|
@ -403,6 +403,34 @@ jobs:
|
||||||
# This is using llvmpipe and runs slower than other backends
|
# This is using llvmpipe and runs slower than other backends
|
||||||
ctest -L main --verbose --timeout 1800
|
ctest -L main --verbose --timeout 1800
|
||||||
|
|
||||||
|
- name: Determine tag name
|
||||||
|
id: tag
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||||
|
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||||
|
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||||
|
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||||
|
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Pack artifacts
|
||||||
|
id: pack_artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
run: |
|
||||||
|
cp LICENSE ./build/bin/
|
||||||
|
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
||||||
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
||||||
|
|
||||||
|
- name: Upload artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
|
||||||
|
name: llama-bin-ubuntu-vulkan-x64.zip
|
||||||
|
|
||||||
ubuntu-22-cmake-hip:
|
ubuntu-22-cmake-hip:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
container: rocm/dev-ubuntu-22.04:6.0.2
|
container: rocm/dev-ubuntu-22.04:6.0.2
|
||||||
|
|
|
@ -1430,6 +1430,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||||
VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");
|
VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");
|
||||||
|
|
||||||
// some shaders have a minimum subgroup size
|
// some shaders have a minimum subgroup size
|
||||||
|
const uint32_t subgroup_size_8 = std::max(device->subgroup_size, 8u);
|
||||||
const uint32_t subgroup_size_16 = std::max(device->subgroup_size, 16u);
|
const uint32_t subgroup_size_16 = std::max(device->subgroup_size, 16u);
|
||||||
const uint32_t subgroup_size_32 = std::max(device->subgroup_size, 32u);
|
const uint32_t subgroup_size_32 = std::max(device->subgroup_size, 32u);
|
||||||
|
|
||||||
|
@ -1492,13 +1493,13 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||||
const uint32_t tk_m = device->coopmat_support ? device->coopmat_k : 1;
|
const uint32_t tk_m = device->coopmat_support ? device->coopmat_k : 1;
|
||||||
const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1;
|
const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1;
|
||||||
|
|
||||||
l_warptile = { 128, 128, 128, 16, device->subgroup_size * 2, 64, 2, tm_l, tn_l, tk_l, device->subgroup_size };
|
l_warptile = { 128, 128, 128, 16, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 };
|
||||||
m_warptile = { 128, 64, 64, 16, device->subgroup_size, 32, 2, tm_m, tn_m, tk_m, device->subgroup_size };
|
m_warptile = { 128, 64, 64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
|
||||||
s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, device->subgroup_size };
|
s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 };
|
||||||
|
|
||||||
l_warptile_mmq = { 128, 128, 128, 32, device->subgroup_size * 2, 64, 2, tm_l, tn_l, tk_l, device->subgroup_size };
|
l_warptile_mmq = { 128, 128, 128, 32, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 };
|
||||||
m_warptile_mmq = { 128, 64, 64, 32, device->subgroup_size, 32, 2, tm_m, tn_m, tk_m, device->subgroup_size };
|
m_warptile_mmq = { 128, 64, 64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
|
||||||
s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, device->subgroup_size };
|
s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 };
|
||||||
|
|
||||||
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
|
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
|
||||||
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
|
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue