From cf5d918073755ffa276d9062f037f74f6016c96d Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon, 28 Aug 2023 04:05:06 -0500 Subject: [PATCH] Koboldcpp-ROCm Port (#399) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * koboldcpp-ROCm Port commit 3416c986d9d9a31c3cdefd7e7bd4d9438d72ba35 Merge: 5eb17f0 4c4e435 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Aug 25 13:46:56 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 5eb17f02c8638e003bb91bddf95ccf54d2ad0c12 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Aug 25 13:38:21 2023 -0500 ROCm Port update * use hipblas based on cublas * Update Makefile for the Cuda kernels * Expand arch list and make it overrideable * Fix multi GPU on multiple amd architectures with rocblas_initialize() (#5) * add hipBLAS to README * new build arg LLAMA_CUDA_MMQ_Y * fix half2 decomposition * Add intrinsics polyfills for AMD * AMD assembly optimized __dp4a * Allow overriding CC_TURING * use "ROCm" instead of "CUDA" * ignore all build dirs * Add Dockerfiles * fix llama-bench * fix -nommq help for non CUDA/HIP --------- Co-Authored-By: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Co-Authored-By: ardfork <134447697+ardfork@users.noreply.github.com> Co-Authored-By: funnbot <22226942+funnbot@users.noreply.github.com> Co-Authored-By: Engininja2 <139037756+Engininja2@users.noreply.github.com> Co-Authored-By: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Co-Authored-By: jammm <2500920+jammm@users.noreply.github.com> Co-Authored-By: jdecourval <7315817+jdecourval@users.noreply.github.com> commit b34f4bd2724733e188ec4f6074042f66a5ed28c9 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Aug 19 17:12:52 2023 -0500 Update README.md commit 7d1196108ad330b32845546fb3472c2172a0b6b8 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Aug 14 23:03:12 2023 -0500 remove force DMMV commit cd61aa0d9e16627935c7978adf488a679ddfa745 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Aug 12 17:24:31 2023 -0500 restore main_gpu parameter commit 4a042f326830271a4c31104051b7b08e08ac234e Author: Henri Vasserman Date: Sat Aug 12 10:51:46 2023 +0300 gfx1100 support --------- Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> Co-authored-by: jammm <2500920+jammm@users.noreply.github.com> Co-authored-by: jdecourval <7315817+jdecourval@users.noreply.github.com> commit 8913bc6fea97d3cb860937b0461f455c6abe3ea1 Author: Henri Vasserman Date: Fri Aug 11 10:16:02 2023 +0300 Allow overriding CC_TURING commit e77a4c37a756c002e97173f4122e088fb304e18a Author: Henri Vasserman Date: Fri Aug 11 10:00:07 2023 +0300 Merge 'origin/master' into hipblas commit cc4c4e355cd553b1557d5fba2562e824db93f9b4 Author: Engininja2 <139037756+Engininja2@users.noreply.github.com> Date: Fri Aug 11 09:43:14 2023 +0300 New __dp4a assembly Now compatible with gfx900 and faster as well. commit 1a03b709848ce68d5bf5966237756167e2cac540 Author: Henri Vasserman Date: Fri Aug 11 09:30:28 2023 +0300 Undo mess --------- Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> commit 4366ff9ba1b1f12e494118ef9b5198479022fcc5 Author: DannyDaemonic Date: Thu Aug 10 13:11:36 2023 -0700 Handle `ENABLE_VIRTUAL_TERMINAL_PROCESSING` more gracefully on earlier versions of Windows. commit 811ff855a24323cafddc95c1b8aca711fef05f76 Author: Christian Demsar Date: Thu Aug 10 10:28:27 2023 -0400 Add --n-predict -2 for stopping generation on full context (#2565) commit 37c9717aaa6815b6a5be21aaab970212f20fe6bf Author: Martin Krasser Date: Thu Aug 10 12:16:38 2023 +0200 Fix grammar-based sampling issue in server (#2566) commit d18ecd5b9e5dde58ae08a3eef1637406159ddaca Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Aug 10 13:19:41 2023 -0500 make mmq gen faster for amd commit 243894a952147a4fac5b6aee748861a0df6cc2c6 Author: Henri Vasserman Date: Thu Aug 10 12:14:40 2023 +0300 ws fix commit ac2f14da445ea87d73539adbd29d19ff2c9eba58 Author: Engininja2 <139037756+Engininja2@users.noreply.github.com> Date: Thu Aug 10 12:11:27 2023 +0300 AMD assembly optimized __dp4a Doesn't seem to work for gfx900, so commented out. commit 9dba0c985f140ddded8cbb671f139e81fff82eed Author: Henri Vasserman Date: Thu Aug 10 12:09:28 2023 +0300 Fix merge --------- Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> Co-authored-by: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> commit f570b5cb1070591527a82d94bba408927b37778d Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 22:11:20 2023 -0500 Revert "revert cuda changes as they are bugggy" This reverts commit 1541bf879772aeeed8ff646bfc52185c2a88b79b. commit 1541bf879772aeeed8ff646bfc52185c2a88b79b Author: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed Aug 9 22:36:41 2023 +0800 revert cuda changes as they are bugggy commit bacc20203efb1839aa313858a04d75255bb4b7f4 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 20:37:17 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit b7cb4cfd109986bd66e8fd382d1e2516eaddfebb Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 20:00:52 2023 -0500 additional fixes commit fadae727baa3735ad3e0667384d6e05ca056b3ef Merge: 518eb2a 8f8ab6c Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 18:45:50 2023 -0500 Merge branch 'hipblas' into develop4Main commit 518eb2af9225f8300a108c4244c7eb0a2217c3bc Merge: bda0215 cae6a84 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 18:32:10 2023 -0500 Merge remote-tracking branch 'upstream/concedo' into develop2Main commit bda0215b413bafc49890aa23fc35f96a191fb3e0 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 18:17:54 2023 -0500 update makefile to multisystem path commit 8f8ab6c4c049df501e9a5ed8fef3aa0fc0691421 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 9 18:05:03 2023 -0500 hipLDFLAG Path change Unix to multisystem in Makefile changed the hardcoded linux distro hipblas LD path from -L/opt/rocm/lib to use the defined ROCM_PATH variable to be flexible with ROCm on non-Linux OS commit 610ba4cfc460ed65c4adc32d3365a216690384d5 Merge: 4024f91 25d43e0 Author: Henri Vasserman Date: Wed Aug 9 23:54:58 2023 +0300 Merge 'origin/master' into hipblas commit 4024f91a665d83b6de8658d45ec9d004c5d90c79 Author: Henri Vasserman Date: Wed Aug 9 01:56:44 2023 +0300 Add intrinsics polyfills for AMD --------- Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> Co-authored-by: funnbot <22226942+funnbot@users.noreply.github.com> Co-authored-by: Engininja2 <139037756+Engininja2@users.noreply.github.com> commit ab6212864ce8e9af200bcedb3e0126ee49aa8d0a Merge: d91456a f5bfea0 Author: Henri Vasserman Date: Wed Aug 9 00:37:01 2023 +0300 Merge 'origin/master' into hipblas commit ee9fa2aca4f2e6645b99702935b34a5f8ec8f05d Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Aug 2 01:53:58 2023 -0500 Update Makefile commit d91456aaf138566fa0aa3d507964049c8a09499b Author: ardfork <134447697+ardfork@users.noreply.github.com> Date: Mon Jul 31 20:35:00 2023 +0300 fix half2 decomposition commit c1cb70d64d307d3fd9b7b9f61bb574e36520499a Author: Henri Vasserman Date: Mon Jul 31 19:56:44 2023 +0300 new build arg LLAMA_CUDA_MMQ_Y commit c1664a00ae98059df863a88cbcb13eeca3025742 Merge: 4336231 0728c5a Author: Henri Vasserman Date: Mon Jul 31 19:32:27 2023 +0300 Merge 'origin/master' into hipblas commit 848558d7d95a5036ac057efdefa9b2a2e6fb61b7 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 30 20:02:52 2023 -0500 import vars logic fix commit b650b849d52aac65364558521f76e75ded7ea590 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 30 00:21:36 2023 -0500 Update easy_KCPP-ROCm_install.sh commit 8573a67a29e813d82e7f032912a8c221cd199505 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 21:31:12 2023 -0500 remove duplicate code and fix typo remove duplicate tooltip commit 430986e3f68f599fd7a11ea4b2b8e45ef33da643 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 21:07:34 2023 -0500 hide "missing" if all are built move tooltip functions to helper functions section. hides the string "Missing: ..." from showing if all backends are available " if len(runopts)==6 else + " commit dd0db7265dbc0b0699ca861291006808b662b0e4 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 20:52:31 2023 -0500 hide "missing" if all are built move tooltip functions to helper functions section. hides the string "Missing: ..." from showing if all backends are available commit 43fffb66d8a30cbd776c3682f8a104c3644206b1 Merge: 0ed65a4 b40550c Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 19:13:15 2023 -0500 Merge branch 'concedo' commit 0ed65a44a5fdb529611730f276a4b910cbf70ae0 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 18:34:21 2023 -0500 Hide unavailable backends & Add tooltip over backend count Hides unavailable backends from the user and if the program is launched without any backends made, it shows an error message to them stating no backends were found and to make them using the 'make' command Add tooltip when hovering over backend count label hovering over the new label that shows the backend count will explain what the numbers are, and show the users which backends are not available or built commit 2a263983ab35024a95c411995963182ada06ed6f Merge: cee2e9d 31486eb Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 29 15:16:33 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 4336231a32a0c6168da5d79801752289622e9e58 Author: Henri Vasserman Date: Sat Jul 29 18:35:56 2023 +0300 add hipBLAS to README --------- Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com> commit f8e3fc6c746b37d69656fb5ae6af8e411d85dbca Author: Henri Vasserman Date: Sat Jul 29 14:16:46 2023 +0300 rocblas init stuff commit d2ade639f4339e786311effb3eafca8bfc360d56 Merge: cde52d6 8a88e58 Author: Henri Vasserman Date: Sat Jul 29 12:59:48 2023 +0300 Merge 'origin/master' into hipblas commit cee2e9d76740fd8e8f50b612078f3e7658460f29 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 26 23:36:55 2023 -0500 Only Show Available Backends in GUI Hides unavailable backends from the user and if the program is launched without any backends made, it shows an error message to them stating no backends were found and to make them using the 'make' command commit 78636109fc2ded79ee3e9a44d2e3c2d63a8de70e Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 26 13:27:22 2023 -0500 Update easy_KCPP-ROCm_install.sh commit 731cd6e2ab9bb722e211142bb633e7018ccdb31b Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Tue Jul 25 22:39:50 2023 -0500 Create easy_rocm_install.sh commit f154685bbdc79b5ace752fbc179e32f2f7806bdb Merge: cbdc1f3 94e0a06 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Tue Jul 25 22:25:10 2023 -0500 Merge branch 'concedo_experimentalMAIN' commit cbdc1f3fb91969e79bc8640e0cebfc3247e200df Merge: 5b838d4 9731682 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 16:53:21 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit cde52d6a63f13f46d6403cc2957f4b4c34ddf4e2 Merge: 8e8054a 84e09a7 Author: Henri Vasserman Date: Mon Jul 24 12:22:58 2023 +0300 Merge 'origin/master' into hipblas commit 8e8054ad83e794b261914ad4f337d43e2c76882d Author: Henri Vasserman Date: Mon Jul 24 12:20:49 2023 +0300 Add rocblas to build files commit 1f6294dc4473701b5be791d47e4b3733f95dbc0a Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 03:52:01 2023 -0500 Fix multi GPU on multiple amd architectures with rocblas_initialize() (#5) * initialize rocblas commit 5b838d47874536ebffc2f6cb25877e0476a9402d Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 03:10:35 2023 -0500 amd multigpu full layer offload w/o vram scratch commit 9bfb2fdd68000670bda85c4e9748d72f5af09764 Merge: b379f9d 66328fc Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 03:07:44 2023 -0500 Merge branch 'concedo_experimental' commit b379f9d6fac570c220c928ff5f4ba4ed1ca7c051 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 03:07:00 2023 -0500 Revert "amd multigpu full layer offload w/o vram scratch" This reverts commit 9adfc8e33f7116d6ae2e0992920733f783b70d08. commit 9adfc8e33f7116d6ae2e0992920733f783b70d08 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 02:56:40 2023 -0500 amd multigpu full layer offload w/o vram scratch commit 05c792e622a1d9838f9343e04f79ddf2bb63ae96 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 24 00:18:48 2023 -0500 initialize rocblas commit ade68d09d7b63d3344e18b6193043b378671eb12 Merge: 521ad6b 56995ca Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 23 20:25:05 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 521ad6b5cb2a107ad7b972025aeb0f353e0cac67 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jul 20 21:42:33 2023 -0500 lazy import_var error handling for saves commit 9553e52e7e4eabe46312729f6c4effeef6390df7 Merge: cac6650 f036109 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jul 20 19:59:41 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit cac6650754502208abfead61ba169fefc5ae84ac Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 17 23:05:02 2023 -0500 Makefile fix! Allows hip/clblast build together commit 3db70b5f0a1a4a1207041ddc5f2c5e25306bad4d Merge: 2ec4466 7568d1a Author: Henri Vasserman Date: Tue Jul 18 01:54:17 2023 +0300 Merge 'origin/master' into hipblas commit f208670ffb6cdbb1e225adfb2fd80a67a6dc5055 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Jul 14 02:56:03 2023 -0500 improve error handling with gpu names commit 860e73845f61fe0afb6a26cc8054d8be1f9e3669 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Jul 14 00:33:03 2023 -0500 Show GPU names in GUI, Only show GPUs that exist changed the pre-set 1,2,3 and 1,2,3,all settings that the GPU selector had and replaced them with a function that grabs the GPU names and sets the names as the values for the selector boxes. commit 2ec4466db54fd2f42f2ab7713cc1061e0cf59bf3 Author: Henri Vasserman Date: Thu Jul 13 13:44:02 2023 +0300 Update build flags. GGML_CUDA_DMMV_Y is now GGML_CUDA_MMV_Y so update your build instructions. GGML_CUDA_FORCE_DMMV is always enabled. --------- Co-authored-by: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> commit cd36b185ff6de91abbfd1b80366dd79a1303a878 Merge: afcb8fe 1cbf561 Author: Henri Vasserman Date: Thu Jul 13 13:03:01 2023 +0300 Merge 'origin/master' into hipblas commit ac7ebc3ac1deedfbc2940443b26774f1b4c85fae Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 18:32:18 2023 -0500 add hipBLAS name scheme to GUI and update README commit 7f85cc5ac30f2f300ca817a489ef209c995c634b Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 17:35:54 2023 -0500 update makefile and ggml.c commit 6ca3499275ba168320424f06ab3301ec329a6a83 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 15:43:45 2023 -0500 ggml.c fix commit 770e674aa5b2a1a9ffff2888a12e27b04ccfc7ef Merge: 2b289cd 5941514 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 15:24:36 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 2b289cde558310c6c67dfc8d508c04e634595716 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 14:30:00 2023 -0500 Update c-cpp.yml commit 5dae95a9bb486c7f720789dffde1cfb470bffce0 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 14:28:51 2023 -0500 Update c-cpp.yml commit b37cd738c84debb53b149f5a9fb73de958f263fd Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 12 14:27:04 2023 -0500 Create c-cpp.yml to test Actions commit afcb8fe0c4f5e918422ea41d08824653d58575ed Author: Henri Vasserman Date: Tue Jul 11 18:09:27 2023 +0300 Add new config option commit 8c2c4978a32d671253809d8f0f09d98af2dd18ab Merge: e610466 2347463 Author: Henri Vasserman Date: Tue Jul 11 17:53:54 2023 +0300 Merge 'origin/master' into hipblas commit e610466307abc8f8bae641682ab3f91dbc33930e Author: Henri Vasserman Date: Tue Jul 11 17:53:14 2023 +0300 Expand arch list and make it overrideable commit 80e4e548bfbace2a966a58cb57dd1720ad7216b2 Merge: 7735c5a 1d16309 Author: Henri Vasserman Date: Mon Jul 10 02:09:28 2023 +0300 Merge 'origin/master' into hipblas commit 8432e9d5dc8d080535243467f8d380271e8d9489 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 9 16:55:30 2023 -0500 Update Makefile commit b58c1893fa839c0f35df96f6a8b026a7f2576762 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 9 16:20:00 2023 -0500 Add multi-gpu CuBLAS support to new GUI commit 0c1c71b9927127b45030fe88283dfbdd23853d34 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 8 07:56:57 2023 -0500 Update Makefile commit f864f60cd8e563e2594cee5a7da7e9aebed494f9 Author: Johannes Gäßler Date: Sat Jul 8 00:25:15 2023 +0200 CUDA: add __restrict__ to mul mat vec kernels (#2140) commit 4539bc2761a7a23b588b5420b9d3fd1962ff63e5 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 8 01:36:14 2023 -0500 update makefile for changes commit 912e31ec523eac9ef308f0d28bc2d93aab7c3ecb Merge: 74e2703 ddaa4f2 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Jul 7 23:15:37 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 74e2703ac3b1557f107e540657d0919db115f913 Merge: cf65429 f9108ba Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jul 5 15:16:49 2023 -0500 Merge branch 'LostRuins:concedo' into main commit 7735c5a9af58f6713b54fd5a4b6463f3b116d44d Merge: c3e3733 7ee76e4 Author: Henri Vasserman Date: Tue Jul 4 17:09:16 2023 +0300 Merge 'origin/master' into hipblas commit cf65429c3832d32a8c17c7ed5ab47066d7511fbe Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 3 16:56:40 2023 -0500 print cuda or opencl based on what's used commit 72c16d2310b2e4c44018e2084aeb79e68c0b8709 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 3 16:45:39 2023 -0500 Revert "fix my mistake that broke other arches" This reverts commit 777aed5e69e240a54e7d3da962d8520855f072b9. commit 777aed5e69e240a54e7d3da962d8520855f072b9 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jul 3 15:53:32 2023 -0500 fix my mistake that broke other arches commit 27780a987a8dabb18689038c0397e16f2f219c7e Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 16:03:27 2023 -0500 rocm fixes commit f52c7d439770c1ea0bebc1f895b74d6aeea5f0a6 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 16:02:58 2023 -0500 Revert "rocm fixes" This reverts commit 2fe9927353a1e53353623f850d3d534da88f5154. commit 2fe9927353a1e53353623f850d3d534da88f5154 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 15:58:21 2023 -0500 rocm fixes commit efe7560c83a497f5e750bbe27922babd4233bda9 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 15:55:43 2023 -0500 Revert "move HIPBLAS definitions into ggml-cuda.h" This reverts commit bf49a93d63f833b7871ba6e60f8fe207562678ee. commit 4fc0181e44685019dcd309d4bb345cac7a5fef87 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 15:55:36 2023 -0500 Revert "move hipblas definitions to header files" This reverts commit 2741ffb70464a71fd138484de4b41da05622e027. commit 89eb576f2771bd81a3a6274348b47535dfdd5f63 Merge: 2741ffb 3d2907d Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jul 2 14:44:13 2023 -0500 Merge branch 'LostRuins:concedo' into main commit c3e3733c61f7705ea00fd593ee94527da8c12f1b Author: Henri Vasserman Date: Sun Jul 2 15:51:31 2023 +0300 ROCm fixes commit 15db19ae7b70d2a6350063e633b898a89ad78cbc Merge: 04419f1 46088f7 Author: Henri Vasserman Date: Sun Jul 2 15:39:57 2023 +0300 Merge 'origin/master' into hipblas commit 2741ffb70464a71fd138484de4b41da05622e027 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 1 17:07:42 2023 -0500 move hipblas definitions to header files commit bf49a93d63f833b7871ba6e60f8fe207562678ee Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 1 16:38:50 2023 -0500 move HIPBLAS definitions into ggml-cuda.h commit 540f4e05f4e95378f46a83e2919d3962c0ef9eac Merge: 2c3b46f eda663f Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jul 1 14:58:32 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 2c3b46f8a80ca9d94b2d3d06e1af6b6f7b791914 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 29 18:43:43 2023 -0500 changes to fix build commit c9e1103da0d72fd39a36391ac4b5d941a133598a Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 29 18:20:07 2023 -0500 Update ggml_v2-cuda-legacy.cu for ROCM commit b858fc5db80ed545a6fbeae3d551bddb47955598 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 29 17:49:39 2023 -0500 changes to work with upstream commit 69a0c2534bb8825f4009760b12d9bd44d108c6ed Merge: 096f0b0 1347d3a Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 29 16:59:06 2023 -0500 Merge remote-tracking branch 'upstream/concedo' commit 04419f18947e7b0dc43c07869eac3965f22b34cf Merge: bb16eff d3494bb Author: Henri Vasserman Date: Wed Jun 28 23:30:10 2023 +0300 Merge 'origin/master' into hipblas commit bb16effc750e2706050f5d4ec89cecc42cc13882 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 28 15:27:10 2023 -0500 headers fix; add kquants_iter for hipblas and add gfx803 (#1) * kquants_iter for hipblas and add gfx803 * Update CMakeLists.txt with hipblas kquants_iter and DMMV_F16 * remove dmmv_f16 for now commit 096f0b055e11b7d930842f86146d0e5013c5dce6 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 28 15:27:02 2023 -0500 revert unnecessary hipblas conditionals commit d81e81adffd6eb59e280ae1885864bb5fbd9bba6 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 28 14:48:23 2023 -0500 Update Makefile hipblas nvcc correction commit c8ae94524a8bd7dca891b6b711cb5598a30fcf74 Merge: c1e5c83 0be54f7 Author: Henri Vasserman Date: Tue Jun 27 10:50:37 2023 +0300 Merge 'origin/master' into hipblas commit 2579ecf8db9569d7756161f05ce7b0f5f23174b0 Merge: abed427 d2034ce Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 25 17:50:04 2023 -0500 Merge branch 'LostRuins:concedo' into main commit c1e5c8345eca45563d382d9417b84ed5f0ab77ff Merge: 35a6031 447ccbe Author: Henri Vasserman Date: Sun Jun 25 21:40:05 2023 +0300 Merge 'origin/master' into hipblas commit 35a603161a17ddeb6128e9d4718b8fab5e34b558 Merge: df7346c 66a2555 Author: Henri Vasserman Date: Sun Jun 25 10:57:48 2023 +0300 Merge 'origin/master' into hipblas commit abed427b6f370698fe8e8409e7980f238aad03ef Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jun 24 19:16:30 2023 -0500 reorganize If statements to include proper headers commit 06c3bf03b92c2e00fc4bcd27f0c34f32c58b19a9 Merge: ea6d320 8342fe8 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sat Jun 24 16:57:20 2023 -0500 Merge branch 'LostRuins:concedo' into main commit ea6d3208dcdc0b05e2c164dde8ee0bfc6a02ad09 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Fri Jun 23 01:53:28 2023 -0500 Update README.md commit 4d56ad8158595d1e835cb379939dc5526deb39e2 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 22 16:19:43 2023 -0500 Update README.md commit 21f930872b6e232679fe02eac9e429367365c6af Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 22 15:42:05 2023 -0500 kquants_iter for hipblas and add gfx803 commit df7346ccd52bc0368eeeb878e31a284e01eac61a Merge: 5dd2fbe 7487137 Author: Henri Vasserman Date: Thu Jun 22 20:51:09 2023 +0300 Merge 'origin/master' into hipblas commit b6ff89066bbf2de23dab90bc8bbf9f63d8d1e070 Merge: eb094f0 e6ddb15 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu Jun 22 12:42:09 2023 -0500 Merge branch 'LostRuins:concedo' into main commit eb094f043f9b0b94e7db028ca36e96ce479b0369 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 21 23:59:18 2023 -0500 lowvram parameter description commit 3a5dfeb568d543376910180caa9a99b081fef9d4 Merge: 665cc11 b1f00fa Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 21 16:53:03 2023 -0500 Merge branch 'LostRuins:concedo' into koboldcpp-rocm commit 665cc1136b188e7ff5c1aa1359118c999ff6d162 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Wed Jun 21 01:13:19 2023 -0500 add lowvram parameter commit 222cbbb141f7ce79884cafb6bcebd860ae27cc04 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Tue Jun 20 19:03:28 2023 -0500 add additional hipblas conditions for cublas commit e1f958124ec99525cb58d8c534f9d1789377544e Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Tue Jun 20 16:51:59 2023 -0500 Add hip def for cuda v2 commit 3bff5c0f0defd9d49b770c5ce107c71e5cba8003 Merge: a7e74b3 266d47a Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Tue Jun 20 13:38:06 2023 -0500 Merge branch 'LostRuins:concedo' into koboldcpp-rocm commit a7e74b39fe5eedf85d955fe5ea5f4c546322a9b0 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jun 19 22:04:18 2023 -0500 Update README.md commit 5e99b3cb72d83f45b3f7904ffb8f242e743a142c Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jun 19 22:03:42 2023 -0500 Update Makefile commit 9190b17432ebdc489ab05b71df6c3b8d5e7f5895 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Mon Jun 19 21:47:10 2023 -0500 Update README.md commit 5dd2fbe6ea87f78e38d888844a3820302a297048 Merge: 67e229b 20568fe Author: Henri Vasserman Date: Tue Jun 20 01:23:12 2023 +0300 Merge 'origin/master' into hipblas commit 2780ea292b1e9c6ead274de3afb34337716be08f Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 18 15:48:00 2023 -0500 Update Makefile commit 04a3e64807a92c2e105af92f16dd6db2ea024d39 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 18 14:33:39 2023 -0500 remove extra line commit cccbca9dea3780e797a3b4972ba211e0c762fdc1 Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 18 14:31:17 2023 -0500 attempt adding ROCM hipblas commit a44a1d4b90ed11d83d622eb976a945ff26a8974e Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 18 14:31:01 2023 -0500 attempt adding ROCM hipblas commit b08818416972f83349bc4d6479bccc55ee31436d Author: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Sun Jun 18 14:30:54 2023 -0500 attempt adding ROCM hipblas commit 67e229b7ca0a51f367c1e1495a15c261d0893d25 Merge: 6f7c156 b241649 Author: Henri Vasserman Date: Sun Jun 18 00:36:54 2023 +0300 Merge 'origin/master' into hipblas commit 6f7c15637a8ed60d5d5dade24aaf63a296bc32a6 Merge: 61df8e9 fc45a81 Author: Henri Vasserman Date: Sat Jun 17 16:53:22 2023 +0300 Merge 'origin/master' into hipblas commit 61df8e92179b84af9041e53f61d0194dfd791de0 Author: Henri Vasserman Date: Wed Jun 14 22:46:10 2023 +0300 add cudaMemset commit a836529996845343dfb96becb4fd48e3f55da55c Merge: 85f902d 254a7a7 Author: Henri Vasserman Date: Wed Jun 14 22:41:55 2023 +0300 Merge 'origin/master' into hipblas commit 85f902d5c44cee18858812212dad850b9409c7f9 Merge: 4362e80 b50b570 Author: Henri Vasserman Date: Thu Jun 8 10:50:28 2023 +0300 Merge 'origin/master' into hipblas commit 4362e805a4b0bd80d0cff0e3d8d0b1162cc8043c Merge: fa5b3d7 17366df Author: Henri Vasserman Date: Tue Jun 6 23:14:40 2023 +0300 Merge 'origin/master' into hipblas commit fa5b3d7365266a9903450c1105551ffec7f51d92 Author: Henri Vasserman Date: Tue Jun 6 18:47:00 2023 +0300 fix makefile. commit 1ba4ce4ad792f9672eecc37bf982386d3a007914 Author: Henri Vasserman Date: Tue Jun 6 18:41:08 2023 +0300 Revert "warp size fixes" It seems like 32 is faster for me, at least and it won't cause so many conflicts. This reverts commit 5d6eb72164e5ae000d07dd725e635faa7a2f723d. commit 5d6eb72164e5ae000d07dd725e635faa7a2f723d Author: Henri Vasserman Date: Tue Jun 6 18:32:41 2023 +0300 warp size fixes commit 33091a9bd3bb3ecf59b0f5535b084f443f6a20b6 Merge: 9fdaa1d 2d43387 Author: Henri Vasserman Date: Tue Jun 6 16:19:23 2023 +0300 Merge 'origin/master' into hipblas commit 9fdaa1d2501a2c4a030af6d34e97b2e4766b27c4 Author: Henri Vasserman Date: Sat May 27 19:17:53 2023 +0300 Add more defs For forward compatibility #1607 commit a4648c1e7c70b4985393ec0851403ef7fb8d1ffc Merge: 4c8b3fb 0ecb1bb Author: Henri Vasserman Date: Sat May 27 18:22:39 2023 +0300 Merge 'origin/master' into hipblas commit 4c8b3fb1071dff0cd0c4b4f96e506294ba6473f4 Author: Henri Vasserman Date: Fri May 26 01:08:53 2023 +0300 add configurable vars commit 30d921af3e0b21f511652c98448ccb631434d0d4 Author: Henri Vasserman Date: Fri May 26 01:03:56 2023 +0300 and makefile commit a593a4f6c24389528a5eed8e6dc86eb06ced38b8 Author: Henri Vasserman Date: Fri May 26 00:55:28 2023 +0300 Add missing parameters commit 174bf6a86d045a30b1253cbe3cc773808b202186 Merge: f80ce7a 1fcdcc2 Author: Henri Vasserman Date: Fri May 26 00:44:23 2023 +0300 Merge 'origin/master' into hipblas commit f80ce7a4e00b33adf6b13d231689dbf3a33ec475 Merge: 600ace3 ac7876a Author: Henri Vasserman Date: Thu May 25 00:02:50 2023 +0300 Merge branch 'origin/master' into hipblas commit 600ace39c8f1d311b8f3c49003f5a6448a44b18e Author: Henri Vasserman Date: Sat May 20 23:42:20 2023 +0300 update warp size commit b19fefef943d974db2eda8a8908e67e1d08e317c Author: Henri Vasserman Date: Sat May 20 23:28:08 2023 +0300 Forwardcompat commit c66115b833178ea3711543ddbbd4eb2b21ab523e Merge: a0b2d5f b8ee340 Author: Henri Vasserman Date: Sat May 20 18:29:31 2023 +0300 Merge 'origin/master' into hipblas commit a0b2d5f291cd04b16f71dd5d05d9f5126ba2376a Merge: 8bab456 2a5ee02 Author: Henri Vasserman Date: Tue May 16 17:08:29 2023 +0300 Merge 'origin/master' into hipblas commit 8bab45611e2ccc306d8b3b1e221772a3ce6b3f82 Merge: 2956630 b5c9295 Author: Henri Vasserman Date: Mon May 15 00:01:12 2023 +0300 Merge 'origin/master' into hipblas commit 2956630a3d48db75473a8e5a13009630e8493024 Merge: 0fe6384 f048af0 Author: Henri Vasserman Date: Sat May 13 13:12:52 2023 +0300 Merge 'origin/master' into hipblas commit 0fe6384755b478bd57c38e626db36f144c617b40 Author: Henri Vasserman Date: Fri May 12 17:22:11 2023 +0300 fix makefile commit 605560d9ec008559c1c4d6a2f0072d20bd12414b Merge: 127f68e 089b1c9 Author: Henri Vasserman Date: Fri May 12 16:12:53 2023 +0300 Merge 'origin/master' into hipblas commit 127f68eb5a578a0376892e4a63c365bf7304a49d Merge: 070cbcc b608b55 Author: Henri Vasserman Date: Thu May 11 20:21:27 2023 +0300 Merge 'origin/master' into hipblas commit 070cbcc1bd7f1b5049feec43507a320d22aac815 Author: Henri Vasserman Date: Sun May 7 18:10:56 2023 +0300 occupanct function commit a3296d50aa319e7f25477ad568691fdff5ca9f83 Merge: 0aefa6a e129551 Author: Henri Vasserman Date: Sun May 7 18:06:04 2023 +0300 Merge 'origin/master' into hipblas commit 0aefa6ab71af4dc21d18ce5ec763a0691992bb1c Merge: baeb482 1b0fd45 Author: Henri Vasserman Date: Sun May 7 12:24:41 2023 +0300 Merge 'origin/master' into hipblas commit baeb482a9429cb7d962da34e9820e62d14ffbe31 Author: Henri Vasserman Date: Sun May 7 12:24:12 2023 +0300 Revert to default copy commit 289073a5328dacb34774e07ddacb31ab8ebe14c5 Merge: 1107194 173d0e6 Author: Henri Vasserman Date: Sat May 6 19:59:41 2023 +0300 Merge 'origin/master' into hipblas commit 1107194e6b36de21fe1210f1383ffb314877fbc8 Merge: 04c0d48 a3b85b2 Author: Henri Vasserman Date: Sat May 6 00:38:20 2023 +0300 Merge 'origin/master' into hipblas commit 04c0d480d780b7e43f9cd5726b1c1d66570b57d8 Author: Henri Vasserman Date: Thu May 4 12:31:16 2023 +0300 Move all HIP stuff to ggml-cuda.cu commit d83cfbad0cd98b31cbf2b68586b2196f1af27a90 Merge: b67cc50 799fdc1 Author: Henri Vasserman Date: Thu May 4 11:31:16 2023 +0300 Merge 'origin/master' into hipblas commit b67cc50dadd60dafd51f13780388327d082251d1 Merge: fcbc262 e216aa0 Author: Henri Vasserman Date: Wed May 3 15:04:51 2023 +0300 Merge 'origin/master' into hipblas commit fcbc262eb975445c63f071d361a79a8afffe8352 Merge: c73def1 f4cef87 Author: Henri Vasserman Date: Mon May 1 22:45:29 2023 +0300 Merge 'origin/master' into hipblas commit c73def129a263ba59710794fc946ecd84b272167 Merge: d8ea75e f0d70f1 Author: Henri Vasserman Date: Sun Apr 30 18:40:42 2023 +0300 Merge 'origin/master' into hipblas commit d8ea75e9528cfa8a6c724844ab51334c392c2e19 Merge: d194586 334637e Author: Henri Vasserman Date: Sat Apr 29 11:25:51 2023 +0300 Merge 'origin/master' into hipblas commit d194586f6570e991a6c36c62bb0252a9bcdf1f02 Merge: 2ab9d11 7f15c5c Author: Henri Vasserman Date: Fri Apr 28 23:03:52 2023 +0300 Merge 'origin/master' into hipblas commit 2ab9d11f376d87944b26934e363cd2a189d09de8 Merge: 3b4a531 04aaae1 Author: Henri Vasserman Date: Fri Apr 28 16:30:05 2023 +0300 Merge 'origin/master' into hipblas commit 3b4a53138f29bf51fe50326daf9cd3a775da8be6 Merge: a1caa48 0b2da20 Author: Henri Vasserman Date: Fri Apr 28 10:08:41 2023 +0300 Merge 'origin/master' into hipblas commit a1caa486113eb3d1192c6d554feaff7419194313 Author: Henri Vasserman Date: Fri Apr 28 10:08:21 2023 +0300 add more cuda defines This is so 'slaren/cuda-f16f32' would merge. commit ecc056519fd08363922875b23956a13a7b6fbdcf Author: Henri Vasserman Date: Fri Apr 28 01:58:27 2023 +0300 only .cu file needs to be complied as device commit ef51e9ecac67154128c506cade79c4fb7ed17cb5 Merge: d571d16 4afcc37 Author: Henri Vasserman Date: Wed Apr 26 12:46:26 2023 +0300 Merge branch 'ggerganov:master' into hipblas commit d571d1629f470f2399d814edfe26ae9d8a59d7e6 Merge: 608aa33 dd0eabc Author: Henri Vasserman Date: Tue Apr 25 21:15:33 2023 +0300 Merge 'origin/master' into hipblas commit 608aa33d9f0ee8a7183ed4f9fb62532a65f5b097 Author: Henri Vasserman Date: Tue Apr 25 21:15:04 2023 +0300 change default GPU arch to match CMake commit 3a004b2a0166e412d8d54052c50bfd093611ad95 Author: Henri Vasserman Date: Mon Apr 24 02:24:54 2023 +0300 add rpath commit db7a01297e691caaf670a3afd197d2802af78d67 Merge: 3677235 284685f Author: Henri Vasserman Date: Sun Apr 23 21:49:28 2023 +0300 Merge 'origin/master' into hipblas commit 367723544c2187a2a6cd5954ca37a8faf5335e5a Author: Henri Vasserman Date: Sat Apr 22 23:28:00 2023 +0300 More build file changes commit d3e1984ce0df5af62ab69c1bdd55a743af4157cc Author: Henri Vasserman Date: Fri Apr 21 03:32:06 2023 +0300 add rpath commit 0e005f779357c9594b942adaf8d985edb071642a Author: Henri Vasserman Date: Fri Apr 21 02:13:00 2023 +0300 Build file changes Now HIP Clang is not required, the CMake scripts will configure the needed compiler, which can be system clang++. Also other code can still use GCC, but CMake will force the clang to link. commit 54a63c10e85bf454eb1ea99cc27d89cce06144b6 Author: Henri Vasserman Date: Thu Apr 20 22:19:22 2023 +0300 Update Makefile for the Cuda kernels commit 0fd8363adc3f248820f3908d5845c61c2fa36f6f Author: Henri Vasserman Date: Thu Apr 20 02:04:00 2023 +0300 use hipblas based on cublas * Merge Fixes * readme merge fix * remove old ggmlv2 changes * bring ggml v2_cuda up to date with AMD changes * Revert ggml v2_cuda changes BC they werent needed This reverts commit 3385dd4240e16ce78337aef8b6090348bf87e1c7. * avoid launching subprocesses to get device names for now, but other than that seems to be working --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com> --- CMakeLists.txt | 40 ++++++++++++++- Makefile | 86 +++++++++++++++++++++++++------- gpttype_adapter.cpp | 4 +- koboldcpp.py | 41 +++++++++------ llama.cpp | 4 ++ otherarch/ggml_v2-cuda-legacy.cu | 55 ++++++++++++++++++++ otherarch/ggml_v2-cuda.cu | 58 ++++++++++++++++++++- otherarch/gpt2_v3.cpp | 12 ++++- otherarch/gptj_v3.cpp | 14 ++++-- otherarch/llama_v2.cpp | 2 +- otherarch/mpt_v3.cpp | 12 ++++- otherarch/neox_v3.cpp | 14 ++++-- 12 files changed, 291 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 466ffd04f..a3cb22e0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,13 +43,14 @@ if (NOT MSVC) endif() # 3rd party libs -option(LLAMA_CUBLAS "llama: use CUDA" ON) +option(LLAMA_CUBLAS "llama: use CUDA" OFF) set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") option(LLAMA_CUDA_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF) set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) option(LLAMA_K_QUANTS "llama: use k-quants" ON) @@ -121,6 +122,43 @@ if (LLAMA_CUBLAS) endif() endif() +if (LLAMA_HIPBLAS) + list(APPEND CMAKE_PREFIX_PATH /opt/rocm) + + if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang") + endif() + if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") + endif() + + find_package(hip) + find_package(hipblas) + find_package(rocblas) + + if (${hipblas_FOUND} AND ${hip_FOUND}) + message(STATUS "HIP and hipBLAS found") + add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS) + add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h) + if (LLAMA_CUDA_FORCE_DMMV) + target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV) + endif() + target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) + target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) + target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) + target_compile_definitions(ggml-rocm PRIVATE CC_TURING=1000000000) + set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX) + target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas) + + if (LLAMA_STATIC) + message(FATAL_ERROR "Static linking not supported for HIP/ROCm") + endif() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-rocm) + else() + message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm") + endif() +endif() + if (LLAMA_ALL_WARNINGS) if (NOT MSVC) set(c_flags diff --git a/Makefile b/Makefile index f7cf21d5c..1b2c6bc7f 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,6 @@ ifneq ($(shell grep -e "Arch Linux" -e "ID_LIKE=arch" /etc/os-release 2>/dev/nul ARCH_ADD = -lcblas endif -CCV := $(shell $(CC) --version | head -n 1) -CXXV := $(shell $(CXX) --version | head -n 1) # Mac OS + Arm can report x86_64 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 @@ -195,6 +193,45 @@ ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-l $(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ endif # LLAMA_CUBLAS +ifdef LLAMA_HIPBLAS + ROCM_PATH ?= /opt/rocm + CC := $(ROCM_PATH)/llvm/bin/clang + CXX := $(ROCM_PATH)/llvm/bin/clang++ + GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 + LLAMA_CUDA_DMMV_X ?= 128 + LLAMA_CUDA_MMV_Y ?= 2 + LLAMA_CUDA_KQUANTS_ITER ?= 1 + HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) +ifdef LLAMA_CUDA_FORCE_DMMV + HIPFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # LLAMA_CUDA_FORCE_DMMV + HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas + HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o +ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \ + -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \ + -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \ + -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \ + -DCC_TURING=1000000000 +ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \ + -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \ + -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \ + -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \ + -DCC_TURING=1000000000 +ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \ + -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \ + -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \ + -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \ + -DCC_TURING=1000000000 # DGGML_CUDA_DMMV_F16 does not currently work with AMD. +ggml-cuda.o: ggml-cuda.cu ggml-cuda.h + $(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< +ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h + $(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< +ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h + $(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< +endif # LLAMA_HIPBLAS + + + ifdef LLAMA_METAL CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG CXXFLAGS += -DGGML_USE_METAL @@ -224,12 +261,16 @@ ifneq ($(filter armv8%,$(UNAME_M)),) CFLAGS += -mfp16-format=ieee -mno-unaligned-access endif +CCV := $(shell $(CC) --version | head -n 1) +CXXV := $(shell $(CXX) --version | head -n 1) + DEFAULT_BUILD = FAILSAFE_BUILD = OPENBLAS_BUILD = NOAVX2_BUILD = CLBLAST_BUILD = CUBLAS_BUILD = +HIPBLAS_BUILD = ifeq ($(OS),Windows_NT) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) @@ -238,10 +279,12 @@ ifeq ($(OS),Windows_NT) NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) -ifdef LLAMA_CUBLAS - CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS) -endif - + ifdef LLAMA_CUBLAS + CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS) + endif + ifdef LLAMA_HIPBLAS + HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS) + endif else DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) @@ -250,24 +293,29 @@ else NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) endif ifdef LLAMA_CLBLAST - ifeq ($(UNAME_S),Darwin) - CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) - else - CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) - endif + ifeq ($(UNAME_S),Darwin) + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) + else + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) + endif endif -ifdef LLAMA_CUBLAS - CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS) -endif + ifdef LLAMA_CUBLAS + CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS) + endif + ifdef LLAMA_HIPBLAS + HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS) + endif ifndef LLAMA_OPENBLAS ifndef LLAMA_CLBLAST ifndef LLAMA_CUBLAS + ifndef LLAMA_HIPBLAS OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' endif endif endif + endif endif @@ -302,7 +350,7 @@ ggml_noavx2.o: ggml.c ggml.h ggml_clblast.o: ggml.c ggml.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_cublas.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ #quants K k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h @@ -328,7 +376,7 @@ ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ #extreme old version compat ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h @@ -365,7 +413,7 @@ gpttype_adapter.o: $(GPTTYPE_ADAPTER) gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) - $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ + $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ clean: rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so @@ -390,8 +438,8 @@ koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o com $(NOAVX2_BUILD) koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o $(OBJS) $(CLBLAST_BUILD) -koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(OBJS) - $(CUBLAS_BUILD) +koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(HIP_OBJS) $(OBJS) + $(CUBLAS_BUILD) $(HIPBLAS_BUILD) quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index ee8a47a26..37392b8c4 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -440,7 +440,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in //this is used for the mem_per_token eval, openblas needs more RAM bool use_scratch = ggml_cpu_has_gpublas(); - int cu_parseinfo_maindevice = inputs.cublas_info<0?0:inputs.cublas_info; + int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info; printf("System Info: %s\n", llama_print_system_info()); #if defined(GGML_USE_CUBLAS) @@ -530,7 +530,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in if(!ts_all_zero) { llama_ctx_params.tensor_split = inputs.tensor_split; - printf("CUBLAS: Applying Custom Tensor Split!\n"); } #endif @@ -600,7 +599,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in if(!ts_all_zero) { llama_ctx_params.tensor_split = inputs.tensor_split; - printf("CUBLAS: Applying Custom Tensor Split!\n"); } #endif diff --git a/koboldcpp.py b/koboldcpp.py index 96d422685..6289ffd1b 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -215,13 +215,6 @@ def load_model(model_filename): if args.useclblast: clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1]) inputs.clblast_info = clblastids - inputs.cublas_info = 0 - if (args.usecublas and "0" in args.usecublas): - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - elif (args.usecublas and "1" in args.usecublas): - os.environ["CUDA_VISIBLE_DEVICES"] = "1" - elif (args.usecublas and "2" in args.usecublas): - os.environ["CUDA_VISIBLE_DEVICES"] = "2" for n in range(tensor_split_max): if args.tensor_split and n < len(args.tensor_split): @@ -229,6 +222,22 @@ def load_model(model_filename): else: inputs.tensor_split[n] = 0 + # we must force an explicit tensor split + # otherwise the default will divide equally and multigpu crap will slow it down badly + inputs.cublas_info = 0 + if (args.usecublas and "0" in args.usecublas): + inputs.cublas_info = 0 + if not args.tensor_split: + inputs.tensor_split[inputs.cublas_info] = 100 + elif (args.usecublas and "1" in args.usecublas): + inputs.cublas_info = 1 + if not args.tensor_split: + inputs.tensor_split[inputs.cublas_info] = 100 + elif (args.usecublas and "2" in args.usecublas): + inputs.cublas_info = 2 + if not args.tensor_split: + inputs.tensor_split[inputs.cublas_info] = 100 + inputs.executable_path = (getdirpath()+"/").encode("UTF-8") inputs.debugmode = args.debugmode banned_tokens = args.bantokens @@ -730,7 +739,7 @@ def show_new_gui(): lib_option_pairs = [ (lib_openblas, "Use OpenBLAS"), (lib_clblast, "Use CLBlast"), - (lib_cublas, "Use CuBLAS"), + (lib_cublas, "Use CuBLAS/hipBLAS"), (lib_default, "Use No BLAS"), (lib_noavx2, "NoAVX2 Mode (Old CPU)"), (lib_failsafe, "Failsafe Mode (Old CPU)")] @@ -895,7 +904,7 @@ def show_new_gui(): def changerunmode(a,b,c): index = runopts_var.get() - if index == "Use CLBlast" or index == "Use CuBLAS": + if index == "Use CLBlast" or index == "Use CuBLAS/hipBLAS": gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") if index == "Use CLBlast": @@ -903,7 +912,7 @@ def show_new_gui(): quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") if gpu_choice_var.get()=="All": gpu_choice_var.set("1") - elif index == "Use CuBLAS": + elif index == "Use CuBLAS/hipBLAS": CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") else: @@ -914,7 +923,7 @@ def show_new_gui(): quick_gpu_selector_box.grid_forget() CUDA_quick_gpu_selector_box.grid_forget() - if index == "Use CuBLAS": + if index == "Use CuBLAS/hipBLAS": lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw") quick_lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw") mmq_box.grid(row=4, column=1, padx=8, pady=1, stick="nw") @@ -925,7 +934,7 @@ def show_new_gui(): mmq_box.grid_forget() quick_mmq_box.grid_forget() - if index == "Use CLBlast" or index == "Use CuBLAS": + if index == "Use CLBlast" or index == "Use CuBLAS/hipBLAS": gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw") gpu_layers_entry.grid(row=5, column=1, padx=8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw") @@ -1111,7 +1120,7 @@ def show_new_gui(): gpuchoiceidx = int(gpu_choice_var.get())-1 if runopts_var.get() == "Use CLBlast": args.useclblast = [[0,0], [1,0], [0,1]][gpuchoiceidx] - if runopts_var.get() == "Use CuBLAS": + if runopts_var.get() == "Use CuBLAS/hipBLAS": if gpu_choice_var.get()=="All": args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"] else: @@ -1337,7 +1346,7 @@ def show_old_gui(): blaschoice = tk.StringVar() blaschoice.set("BLAS = 512") - runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use CuBLAS GPU","Use No BLAS","NoAVX2 Mode (Old CPU)","Failsafe Mode (Old CPU)"] + runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use CuBLAS/hipBLAS GPU","Use No BLAS","NoAVX2 Mode (Old CPU)","Failsafe Mode (Old CPU)"] runchoice = tk.StringVar() runchoice.set("Use OpenBLAS") @@ -1779,8 +1788,8 @@ if __name__ == '__main__': compatgroup = parser.add_mutually_exclusive_group() compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true') compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) - compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', 'mmq']) + compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', 'mmq']) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0) parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') - main(parser.parse_args(),start_server=True) + main(parser.parse_args(),start_server=True) \ No newline at end of file diff --git a/llama.cpp b/llama.cpp index dae8c1484..9ee84345e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2055,7 +2055,11 @@ static void llm_load_tensors( #ifdef GGML_USE_CUBLAS const int max_backend_supported_layers = hparams.n_layer + 3; +#if defined(GGML_USE_HIPBLAS) + const int max_offloadable_layers = low_vram ? hparams.n_layer + 3 : hparams.n_layer + 3; +#else const int max_offloadable_layers = low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3; +#endif if (n_gpu_layers > (int) hparams.n_layer + 1) { if (low_vram) { LLAMA_LOG_INFO("%s: cannot offload v cache to GPU due to low VRAM option\n", __func__); diff --git a/otherarch/ggml_v2-cuda-legacy.cu b/otherarch/ggml_v2-cuda-legacy.cu index d3220a786..e7053b764 100644 --- a/otherarch/ggml_v2-cuda-legacy.cu +++ b/otherarch/ggml_v2-cuda-legacy.cu @@ -4,9 +4,64 @@ #include #include +#if defined(GGML_USE_HIPBLAS) +#include +#include +#include +#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F +#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT +#define CUBLAS_OP_N HIPBLAS_OP_N +#define CUBLAS_OP_T HIPBLAS_OP_T +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define CUBLAS_TF32_TENSOR_OP_MATH 0 +#define CUDA_R_16F HIPBLAS_R_16F +#define CUDA_R_32F HIPBLAS_R_32F +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define cublasCreate hipblasCreate +#define cublasGemmEx hipblasGemmEx +#define cublasHandle_t hipblasHandle_t +#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS +#define cublasSetStream hipblasSetStream +#define cublasSgemm hipblasSgemm +#define cublasStatus_t hipblasStatus_t +#define cudaDeviceProp hipDeviceProp_t +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaEventCreateWithFlags hipEventCreateWithFlags +#define cudaEventDisableTiming hipEventDisableTiming +#define cudaEventRecord hipEventRecord +#define cudaEvent_t hipEvent_t +#define cudaFree hipFree +#define cudaFreeHost hipHostFree +#define cudaGetDevice hipGetDevice +#define cudaGetDeviceCount hipGetDeviceCount +#define cudaGetDeviceProperties hipGetDeviceProperties +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) +#define cudaMemcpy hipMemcpy +#define cudaMemcpy2DAsync hipMemcpy2DAsync +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemcpyKind hipMemcpyKind +#define cudaMemset hipMemset +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize +#define cudaSetDevice hipSetDevice +#define cudaStreamCreateWithFlags hipStreamCreateWithFlags +#define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamSynchronize hipStreamSynchronize +#define cudaStreamWaitEvent hipStreamWaitEvent +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess +#else #include #include #include +#endif #include "ggml_v2-cuda-legacy.h" #include "ggml_v2-cuda.h" diff --git a/otherarch/ggml_v2-cuda.cu b/otherarch/ggml_v2-cuda.cu index 8314adb25..b4502df00 100644 --- a/otherarch/ggml_v2-cuda.cu +++ b/otherarch/ggml_v2-cuda.cu @@ -4,10 +4,66 @@ #include #include +#if defined(GGML_USE_HIPBLAS) +#include +#include +#include +#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F +#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT +#define CUBLAS_OP_N HIPBLAS_OP_N +#define CUBLAS_OP_T HIPBLAS_OP_T +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define CUBLAS_TF32_TENSOR_OP_MATH 0 +#define CUDA_R_16F HIPBLAS_R_16F +#define CUDA_R_32F HIPBLAS_R_32F +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define cublasCreate hipblasCreate +#define cublasGemmEx hipblasGemmEx +#define cublasHandle_t hipblasHandle_t +#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS +#define cublasSetStream hipblasSetStream +#define cublasSgemm hipblasSgemm +#define cublasStatus_t hipblasStatus_t +#define cudaDeviceProp hipDeviceProp_t +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaEventCreateWithFlags hipEventCreateWithFlags +#define cudaEventDisableTiming hipEventDisableTiming +#define cudaEventRecord hipEventRecord +#define cudaEvent_t hipEvent_t +#define cudaFree hipFree +#define cudaFreeHost hipHostFree +#define cudaGetDevice hipGetDevice +#define cudaGetDeviceCount hipGetDeviceCount +#define cudaGetDeviceProperties hipGetDeviceProperties +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) +#define cudaMemcpy hipMemcpy +#define cudaMemcpy2DAsync hipMemcpy2DAsync +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemcpyKind hipMemcpyKind +#define cudaMemset hipMemset +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize +#define cudaSetDevice hipSetDevice +#define cudaStreamCreateWithFlags hipStreamCreateWithFlags +#define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamSynchronize hipStreamSynchronize +#define cudaStreamWaitEvent hipStreamWaitEvent +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess +#else #include #include #include +#endif + #include "ggml_v2-cuda.h" #include "ggml_v2.h" @@ -807,4 +863,4 @@ void ggml_v2_cuda_transform_tensor(ggml_v2_tensor * tensor) { tensor->data = d_Q; tensor->backend = GGML_V2_BACKEND_CUDA; -} \ No newline at end of file +} diff --git a/otherarch/gpt2_v3.cpp b/otherarch/gpt2_v3.cpp index 981e01c7c..97b23265f 100644 --- a/otherarch/gpt2_v3.cpp +++ b/otherarch/gpt2_v3.cpp @@ -359,7 +359,11 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layer)); - fprintf(stderr, "%s: [GPU] offloading %d layers to GPU\n", __func__, n_gpu); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.c_attn_attn_w->backend = GGML_BACKEND_GPU; @@ -378,7 +382,11 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w); #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 8f7cc47f1..42512e190 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -348,7 +348,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layer)); - fprintf(stderr, "%s: [GPU] offloading %d layers to GPU\n", __func__, n_gpu); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.c_attn_q_proj_w->backend = GGML_BACKEND_GPU; @@ -373,7 +377,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w); #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif @@ -644,4 +652,4 @@ bool gptj_eval( ggml_free(ctx0); return true; -} \ No newline at end of file +} diff --git a/otherarch/llama_v2.cpp b/otherarch/llama_v2.cpp index ab9d82f93..01b47697c 100644 --- a/otherarch/llama_v2.cpp +++ b/otherarch/llama_v2.cpp @@ -3101,4 +3101,4 @@ std::vector llama_v2_tokenize(struct llama_v2_context * ctx, const res.resize(n); return res; -} \ No newline at end of file +} diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp index 2bf23055c..57ed90888 100644 --- a/otherarch/mpt_v3.cpp +++ b/otherarch/mpt_v3.cpp @@ -301,7 +301,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layers)); - fprintf(stderr, "%s: [GPU] offloading %d layers to GPU\n", __func__, n_gpu); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.ffn_up_proj->backend = GGML_BACKEND_GPU; @@ -320,7 +324,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo ggml_cuda_transform_tensor(layer.c_attn_out_proj_weight->data,layer.c_attn_out_proj_weight); vram_total += ggml_nbytes(layer.c_attn_out_proj_weight); #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index 7802cab86..d9fb93b28 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -335,7 +335,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & const auto & hparams = model.hparams; size_t vram_total = 0; const int n_gpu = std::min(gpulayers, int(hparams.n_layer)); - fprintf(stderr, "%s: [GPU] offloading %d layers to GPU\n", __func__, n_gpu); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu); + #else + fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu); + #endif for (int i = 0; i < n_gpu; ++i) { const auto & layer = model.layers[i]; layer.c_attn_attn_w->backend = GGML_BACKEND_GPU; @@ -354,7 +358,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w); #endif } - fprintf(stderr, "%s: [GPU] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #if defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #else + fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); + #endif } #endif @@ -663,4 +671,4 @@ bool gpt_neox_eval( ggml_free(ctx0); return true; -} \ No newline at end of file +}