diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 7b2203cc1..12db20b20 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -1532,7 +1532,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     }
     if (kcpp_params->seed <= 0 || kcpp_params->seed==0xFFFFFFFF)
     {
-        kcpp_params->seed = (((uint32_t)time(NULL)) % 1000000);
+        kcpp_params->seed = (((uint32_t)time(NULL)) % 1000000u);
     }
 
     // tokenize the prompt
diff --git a/llama.h b/llama.h
index d3a5ff4c4..19518b8b4 100644
--- a/llama.h
+++ b/llama.h
@@ -4,9 +4,10 @@
 #include "ggml.h"
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
+#define LLAMA_MAX_DEVICES 16
 #else
-#define LLAMA_MAX_DEVICES 1
+//just max it out, same as GGML_CUDA_MAX_DEVICES
+#define LLAMA_MAX_DEVICES 16
 #endif // GGML_USE_CUBLAS
 #include <stddef.h>
 #include <stdint.h>