adjusted down the buf memory allocation now that realloc seems to work
This commit is contained in:
parent
4605074245
commit
49697d86d8
3 changed files with 3 additions and 3 deletions
|
@ -486,7 +486,7 @@ if __name__ == '__main__':
|
||||||
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
|
||||||
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
|
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
|
||||||
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
|
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
|
||||||
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[64,128,256,512,1024], default=512)
|
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[32,64,128,256,512,1024], default=512)
|
||||||
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
|
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
|
||||||
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
|
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
|
||||||
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
||||||
|
|
|
@ -371,7 +371,7 @@ bool gpt2_eval(
|
||||||
const int n_vocab = hparams.n_vocab;
|
const int n_vocab = hparams.n_vocab;
|
||||||
|
|
||||||
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
||||||
static size_t buf_size = 1600u*1024*1024;
|
static size_t buf_size = 512u*1024*1024;
|
||||||
static void * buf = malloc(buf_size);
|
static void * buf = malloc(buf_size);
|
||||||
|
|
||||||
if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
|
if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
|
||||||
|
|
|
@ -382,7 +382,7 @@ bool gptj_eval(
|
||||||
const int d_key = n_embd/n_head;
|
const int d_key = n_embd/n_head;
|
||||||
|
|
||||||
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
||||||
static size_t buf_size = 1600u*1024*1024;
|
static size_t buf_size = 512u*1024*1024;
|
||||||
static void * buf = malloc(buf_size);
|
static void * buf = malloc(buf_size);
|
||||||
|
|
||||||
if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {
|
if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue