metal : handle buffers larger than device's maxBufferLength (#1826)

* metal : handle buffers larger than device's maxBufferLength

* metal : print more verbose device info + handle errors

* metal : fix prints for overlapping views

* metal : minimize view overlap to try to utilize device memory better
This commit is contained in:
Georgi Gerganov 2023-06-18 09:09:47 +03:00 committed by GitHub
parent 57cd69460f
commit ce2c7d72e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 125 additions and 35 deletions

View file

@ -252,7 +252,7 @@ $(info )
ggml.o: ggml.c ggml.h ggml-cuda.h
$(CC) $(CFLAGS) -c $< -o $@
llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
llama.o: llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
$(CXX) $(CXXFLAGS) -c $< -o $@
common.o: examples/common.cpp examples/common.h