diff --git a/CLINFO_LICENSE b/CLINFO_LICENSE deleted file mode 100644 index b238ac5a5..000000000 --- a/CLINFO_LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Windows binaries obtained from the clinfo repo fork here: - -https://github.com/ahoylabs/clinfo/releases/tag/master-d2baa06 - -Source available here: -https://github.com/Oblomov/clinfo - -see below LICENSE file for details on clinfo license - -======= - -clinfo by Giuseppe Bilotta - -To the extent possible under law, the person who associated CC0 with -clinfo has waived all copyright and related or neighboring rights -to clinfo. - -You should have received a copy of the CC0 legalcode along with this -work. If not, see diff --git a/Makefile b/Makefile index 02885242b..958ed9e4e 100644 --- a/Makefile +++ b/Makefile @@ -491,6 +491,10 @@ quantize_neox: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarc quantize_mpt: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +#window simple clinfo +simpleclinfo: simpleclinfo.cpp + $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -o $@ $(LDFLAGS) + build-info.h: $(DONOTHING) diff --git a/clinfo_win.exe b/clinfo_win.exe deleted file mode 100644 index 45fe13d5c..000000000 Binary files a/clinfo_win.exe and /dev/null differ diff --git a/koboldcpp.py b/koboldcpp.py index 654894fa9..a76b44788 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -895,7 +895,7 @@ def show_new_gui(): import customtkinter as ctk nextstate = 0 #0=exit, 1=launch - windowwidth = 530 + windowwidth = 540 windowheight = 500 ctk.set_appearance_mode("dark") root = ctk.CTk() @@ -915,6 +915,11 @@ def show_new_gui(): tabcontentframe.grid(row=0, column=1, sticky="nsew", padx=2, pady=2) tabcontentframe.grid_propagate(False) + CLDevices = ["1","2","3","4"] + CUDevices = ["1","2","3","4","All"] + CLDevicesNames = ["","","",""] + CUDevicesNames = ["","","","",""] + tabcontent = {} lib_option_pairs = [ (lib_openblas, "Use OpenBLAS"), @@ -1003,6 +1008,53 @@ def show_new_gui(): button.grid(row=row+1, column=1, stick="nw") return + # decided to follow yellowrose's and kalomaze's suggestions, this function will automatically try to determine GPU identifiers + # todo: autopick the right number of layers when a model is selected. + # run in new thread so it doesnt block. does not return anything, instead overwrites specific values and redraws GUI + def auto_gpu_heuristics(): + from subprocess import run, CalledProcessError + FetchedCUdevices = [] + try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found. + if os.name == 'nt': + basepath = os.path.abspath(os.path.dirname(__file__)) + output = run([os.path.join(basepath, "simpleclinfo.exe")], capture_output=True, text=True, check=True, encoding='utf-8').stdout + for line in output.splitlines(): + pd = line.split("=")[0].strip() + name = line.split("=")[1].strip() + plat = int(pd.split(" ")[0].strip()) + dev = int(pd.split(" ")[1].strip()) + idx = plat+dev*2 + if idxidx): + CUDevicesNames[idx] = FetchedCUdevices[idx] + pass + + changed_gpu_choice_var() + return + def show_tooltip(event, tooltip_text=None): if hasattr(show_tooltip, "_tooltip"): tooltip = show_tooltip._tooltip @@ -1025,17 +1077,34 @@ def show_new_gui(): def setup_backend_tooltip(parent): num_backends_built = makelabel(parent, str(len(runopts)) + f"/{6 if os.name == 'nt' else 4}", 5, 2) - num_backends_built.grid(row=1, column=2, padx=0, pady=0) + num_backends_built.grid(row=1, column=1, padx=195, pady=0) num_backends_built.configure(text_color="#00ff00") # Bind the backend count label with the tooltip function num_backends_built.bind("", lambda event: show_tooltip(event, f"This is the number of backends you have built and available." + (f"\nMissing: {', '.join(antirunopts)}" if len(runopts) != 6 else ""))) num_backends_built.bind("", hide_tooltip) + def changed_gpu_choice_var(*args): + if gpu_choice_var.get()!="All": + try: + s = int(gpu_choice_var.get())-1 + if runopts_var.get() == "Use CLBlast": + quick_gpuname_label.configure(text=CLDevicesNames[s]) + gpuname_label.configure(text=CLDevicesNames[s]) + else: + quick_gpuname_label.configure(text=CUDevicesNames[s]) + gpuname_label.configure(text=CUDevicesNames[s]) + except Exception as ex: + pass + else: + quick_gpuname_label.configure(text="") + gpuname_label.configure(text="") + # Vars - should be in scope to be used by multiple widgets gpulayers_var = ctk.StringVar(value="0") threads_var = ctk.StringVar(value=str(default_threads)) runopts_var = ctk.StringVar() gpu_choice_var = ctk.StringVar(value="1") + gpu_choice_var.trace("w", changed_gpu_choice_var) launchbrowser = ctk.IntVar(value=1) highpriority = ctk.IntVar() @@ -1079,6 +1148,8 @@ def show_new_gui(): def changerunmode(a,b,c): index = runopts_var.get() if index == "Use CLBlast" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") + gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") if index == "Use CLBlast": @@ -1090,6 +1161,8 @@ def show_new_gui(): CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") else: + quick_gpuname_label.grid_forget() + gpuname_label.grid_forget() gpu_selector_label.grid_forget() gpu_selector_box.grid_forget() CUDA_gpu_selector_box.grid_forget() @@ -1122,6 +1195,7 @@ def show_new_gui(): gpu_layers_entry.grid_forget() quick_gpu_layers_label.grid_forget() quick_gpu_layers_entry.grid_forget() + changed_gpu_choice_var() # presets selector @@ -1136,8 +1210,11 @@ def show_new_gui(): # gpu options quick_gpu_selector_label = makelabel(quick_tab, "GPU ID:", 3) - quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","4"], width=60, variable=gpu_choice_var, state="readonly") - CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=["1","2","3","4","All"], width=60, variable=gpu_choice_var, state="readonly") + quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=CLDevices, width=60, variable=gpu_choice_var, state="readonly") + CUDA_quick_gpu_selector_box = ctk.CTkComboBox(quick_tab, values=CUDevices, width=60, variable=gpu_choice_var, state="readonly") + quick_gpuname_label = ctk.CTkLabel(quick_tab, text="") + quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") + quick_gpuname_label.configure(text_color="#ffff00") quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 5, 50) quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM", lowvram_var, 4,0) quick_mmq_box = makecheckbox(quick_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1) @@ -1172,8 +1249,11 @@ def show_new_gui(): # gpu options gpu_selector_label = makelabel(hardware_tab, "GPU ID:", 3) - gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3","4"], width=60, variable=gpu_choice_var, state="readonly") - CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=["1","2","3","4", "All"], width=60, variable=gpu_choice_var, state="readonly") + gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=CLDevices, width=60, variable=gpu_choice_var, state="readonly") + CUDA_gpu_selector_box = ctk.CTkComboBox(hardware_tab, values=CUDevices, width=60, variable=gpu_choice_var, state="readonly") + gpuname_label = ctk.CTkLabel(hardware_tab, text="") + gpuname_label.grid(row=3, column=1, padx=75, sticky="W") + gpuname_label.configure(text_color="#ffff00") gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 5, 50) tensor_split_entry,tensor_split_label = makelabelentry(hardware_tab, "Tensor Split:", tensor_split_str_vars, 6, 80) lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 4,0) @@ -1465,6 +1545,10 @@ def show_new_gui(): ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5) ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5) + # start a thread that tries to get actual gpu names and layer counts + gpuinfo_thread = threading.Thread(target=auto_gpu_heuristics) + gpuinfo_thread.start() #submit job in new thread so nothing is waiting + # runs main loop until closed or launch clicked root.mainloop() diff --git a/simpleclinfo.cpp b/simpleclinfo.cpp new file mode 100644 index 000000000..286eee528 --- /dev/null +++ b/simpleclinfo.cpp @@ -0,0 +1,111 @@ +//a simple program that obtains the CL platform and devices, prints them out and exits + +#include +#include +#include +#include +#include + +#define CL_TARGET_OPENCL_VERSION 110 +#include +#include + +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +#define CL_CHECK(err) \ + do { \ + cl_int err_ = (err); \ + if (err_ != CL_SUCCESS) { \ + fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", \ + #err, err_, __FILE__, __LINE__); \ + fprintf(stderr, "You may be out of VRAM. Please check if you have enough.\n");\ + exit(1); \ + } \ + } while (0) + +static cl_platform_id platform; +static cl_device_id device; +static cl_context context; +static cl_command_queue queue; +static cl_program program; +static cl_kernel convert_row_f16_cl; +static cl_kernel dequantize_row_q4_0_cl, dequantize_row_q4_1_cl, dequantize_row_q5_0_cl, dequantize_row_q5_1_cl, dequantize_row_q8_0_cl; +static cl_kernel dequantize_mul_mat_vec_q4_0_cl, dequantize_mul_mat_vec_q4_1_cl, dequantize_mul_mat_vec_q5_0_cl, dequantize_mul_mat_vec_q5_1_cl, dequantize_mul_mat_vec_q8_0_cl, convert_mul_mat_vec_f16_cl; +static cl_kernel dequantize_block_q2_k_cl, dequantize_block_q3_k_cl, dequantize_block_q4_k_cl, dequantize_block_q5_k_cl, dequantize_block_q6_k_cl; +static cl_kernel dequantize_mul_mat_vec_q2_K_cl, dequantize_mul_mat_vec_q3_K_cl, dequantize_mul_mat_vec_q4_K_cl, dequantize_mul_mat_vec_q5_K_cl, dequantize_mul_mat_vec_q6_K_cl; +static cl_kernel mul_f32_cl; +static bool fp16_support; + + +int main(void) { + + cl_int err; + + struct cl_device; + struct cl_platform { + cl_platform_id id; + unsigned number; + char name[128]; + char vendor[128]; + struct cl_device * devices; + unsigned n_devices; + struct cl_device * default_device; + }; + + struct cl_device { + struct cl_platform * platform; + cl_device_id id; + unsigned number; + cl_device_type type; + char name[128]; + }; + + enum { NPLAT = 16, NDEV = 16 }; + + struct cl_platform platforms[NPLAT]; + unsigned n_platforms = 0; + struct cl_device devices[NDEV]; + unsigned n_devices = 0; + struct cl_device * default_device = NULL; + + platform = NULL; + device = NULL; + + cl_platform_id platform_ids[NPLAT]; + CL_CHECK(clGetPlatformIDs(NPLAT, platform_ids, &n_platforms)); + + for (unsigned i = 0; i < n_platforms; i++) { + struct cl_platform * p = &platforms[i]; + p->number = i; + p->id = platform_ids[i]; + CL_CHECK(clGetPlatformInfo(p->id, CL_PLATFORM_NAME, sizeof(p->name), &p->name, NULL)); + CL_CHECK(clGetPlatformInfo(p->id, CL_PLATFORM_VENDOR, sizeof(p->vendor), &p->vendor, NULL)); + + cl_device_id device_ids[NDEV]; + cl_int clGetDeviceIDsError = clGetDeviceIDs(p->id, CL_DEVICE_TYPE_ALL, NDEV, device_ids, &p->n_devices); + if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) { + p->n_devices = 0; + } else { + CL_CHECK(clGetDeviceIDsError); + } + p->devices = p->n_devices > 0 ? &devices[n_devices] : NULL; + p->default_device = NULL; + + for (unsigned j = 0; j < p->n_devices; j++) { + struct cl_device * d = &devices[n_devices]; + d->number = n_devices++; + d->id = device_ids[j]; + d->platform = p; + CL_CHECK(clGetDeviceInfo(d->id, CL_DEVICE_NAME, sizeof(d->name), &d->name, NULL)); + CL_CHECK(clGetDeviceInfo(d->id, CL_DEVICE_TYPE, sizeof(d->type), &d->type, NULL)); + printf("%d %d = %s\n",i,j,d->name); + } + } + return 0; +} diff --git a/simpleclinfo.exe b/simpleclinfo.exe new file mode 100644 index 000000000..00aa5e9d5 Binary files /dev/null and b/simpleclinfo.exe differ