diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 7b2203cc1..12db20b20 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1532,7 +1532,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o } if (kcpp_params->seed <= 0 || kcpp_params->seed==0xFFFFFFFF) { - kcpp_params->seed = (((uint32_t)time(NULL)) % 1000000); + kcpp_params->seed = (((uint32_t)time(NULL)) % 1000000u); } // tokenize the prompt diff --git a/llama.h b/llama.h index d3a5ff4c4..19518b8b4 100644 --- a/llama.h +++ b/llama.h @@ -4,9 +4,10 @@ #include "ggml.h" #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES +#define LLAMA_MAX_DEVICES 16 #else -#define LLAMA_MAX_DEVICES 1 +//just max it out, same as GGML_CUDA_MAX_DEVICES +#define LLAMA_MAX_DEVICES 16 #endif // GGML_USE_CUBLAS #include #include