Merge 68ca77a148
into 7736837d62
This commit is contained in:
commit
0bd133ddc4
4 changed files with 4 additions and 4 deletions
|
@ -191,7 +191,7 @@ You can download it from your Linux distro's package manager or from here: [ROCm
|
|||
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
|
||||
```bash
|
||||
set PATH=%HIP_PATH%\bin;%PATH%
|
||||
cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
|
||||
cmake -S . -B build -G Ninja -DGGML_OPENMP=OFF -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build build
|
||||
```
|
||||
Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
|
||||
|
|
|
@ -157,7 +157,7 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) {
|
|||
}
|
||||
|
||||
static inline __m256i mul_sum_us8_pairs_int32x8(const __m256i ax, const __m256i sy) {
|
||||
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
|
||||
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
return _mm256_dpbusd_epi32(zero, ax, sy);
|
||||
#else
|
||||
|
|
|
@ -103,7 +103,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
|
|||
}
|
||||
|
||||
static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
|
||||
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
|
||||
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
|
||||
return _mm256_cvtepi32_ps(summed_pairs);
|
||||
|
|
|
@ -992,7 +992,7 @@ class tinyBLAS_Q0_AVX {
|
|||
|
||||
inline __m256 updot(__m256i u, __m256i s) {
|
||||
__m256i res;
|
||||
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
|
||||
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
|
||||
res = _mm256_dpbusd_epi32(_mm256_setzero_si256(), u, s);
|
||||
#else
|
||||
res = _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_maddubs_epi16(u, s));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue