build : on Mac OS enable Metal by default (#2901)

* build : on Mac OS enable Metal by default

* make : try to fix build on Linux

* make : move targets back to the top

* make : fix target clean

* llama : enable GPU inference by default with Metal

* llama : fix vocab_only logic when GPU is enabled

* common : better `n_gpu_layers` assignment

* readme : update Metal instructions

* make : fix merge conflict remnants

* gitignore : metal
This commit is contained in:
Georgi Gerganov 2023-09-04 22:26:24 +03:00 committed by GitHub
parent bd33e5ab92
commit e36ecdccc8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 143 additions and 133 deletions

View file

@ -368,7 +368,7 @@ results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
// Example, we have a context window of 512, we will compute perplexity for each of the
// last 256 tokens. Then, we split the input up into context window size chunks to
// process the entire prompt.
const int first = std::min(512, params.n_ctx/2);
const int first = params.n_ctx/2;
process_logits(n_vocab, logits.data() + first*n_vocab, tokens.data() + start + first, params.n_ctx - 1 - first,
workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first);
count += params.n_ctx - first - 1;
@ -668,11 +668,6 @@ int main(int argc, char ** argv) {
params.n_ctx += params.ppl_stride/2;
}
if (params.n_ctx > 2048) {
fprintf(stderr, "%s: warning: model might not support context sizes greater than 2048 tokens (%d specified);"
"expect poor results\n", __func__, params.n_ctx);
}
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
if (params.seed == LLAMA_DEFAULT_SEED) {
@ -698,6 +693,11 @@ int main(int argc, char ** argv) {
return 1;
}
if (params.n_ctx > llama_n_ctx(ctx)) {
fprintf(stderr, "%s: warning: model might not support context sizes greater than %d tokens (%d specified);"
"expect poor results\n", __func__, llama_n_ctx(ctx), params.n_ctx);
}
// print system information
{
fprintf(stderr, "\n");