Minor fixes
This commit is contained in:
parent
130adf8415
commit
a0aae528bb
5 changed files with 18 additions and 6 deletions
|
@ -50,6 +50,6 @@ else()
|
||||||
endif()
|
endif()
|
||||||
add_subdirectory(save-load-state)
|
add_subdirectory(save-load-state)
|
||||||
add_subdirectory(simple)
|
add_subdirectory(simple)
|
||||||
#add_subdirectory(speculative)
|
add_subdirectory(speculative)
|
||||||
add_subdirectory(tokenize)
|
add_subdirectory(tokenize)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -73,10 +73,11 @@ int main(int argc, char ** argv) {
|
||||||
// load the draft model
|
// load the draft model
|
||||||
params.model = params.model_draft;
|
params.model = params.model_draft;
|
||||||
params.n_gpu_layers = params.n_gpu_layers_draft;
|
params.n_gpu_layers = params.n_gpu_layers_draft;
|
||||||
if (params.n_threads_draft > 0) {
|
if (params.draft_cpuparams.n_threads > 0) {
|
||||||
params.n_threads = params.n_threads_draft;
|
params.cpuparams.n_threads = params.draft_cpuparams.n_threads;
|
||||||
}
|
}
|
||||||
params.n_threads_batch = params.n_threads_batch_draft;
|
|
||||||
|
params.cpuparams_batch.n_threads = params.draft_cpuparams_batch.n_threads;
|
||||||
llama_init_result llama_init_dft = llama_init_from_gpt_params(params);
|
llama_init_result llama_init_dft = llama_init_from_gpt_params(params);
|
||||||
model_dft = llama_init_dft.model;
|
model_dft = llama_init_dft.model;
|
||||||
ctx_dft = llama_init_dft.context;
|
ctx_dft = llama_init_dft.context;
|
||||||
|
|
|
@ -18737,7 +18737,7 @@ static bool __thread_affinity(const bool * mask) {
|
||||||
|
|
||||||
for (uint32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
|
for (uint32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
|
||||||
if (mask[i]) {
|
if (mask[i]) {
|
||||||
printf("Thread %lx: adding %d to cpuset\n", pthread_self(), i);
|
GGML_PRINT_DEBUG("Thread %lx: adding %d to cpuset\n", pthread_self(), i);
|
||||||
CPU_SET(i, &cpuset);
|
CPU_SET(i, &cpuset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19130,7 +19130,7 @@ static bool ggml_graph_compute_check_for_work(struct ggml_compute_state * state)
|
||||||
if (threadpool->poll) {
|
if (threadpool->poll) {
|
||||||
while (!threadpool->new_work && !threadpool->stop && !threadpool->pause) {
|
while (!threadpool->new_work && !threadpool->stop && !threadpool->pause) {
|
||||||
// No new work. Yield and keep polling.
|
// No new work. Yield and keep polling.
|
||||||
//__cpu_relax();
|
__cpu_relax();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ggml_mutex_lock_shared(&threadpool->mutex);
|
ggml_mutex_lock_shared(&threadpool->mutex);
|
||||||
|
|
|
@ -439,6 +439,8 @@ extern "C" {
|
||||||
LLAMA_API void llama_detach_batch_threadpool(struct llama_context * ctx);
|
LLAMA_API void llama_detach_batch_threadpool(struct llama_context * ctx);
|
||||||
LLAMA_API void llama_detach_threadpools(struct llama_context * ctx);
|
LLAMA_API void llama_detach_threadpools(struct llama_context * ctx);
|
||||||
|
|
||||||
|
// Pauses all attached threadpools
|
||||||
|
LLAMA_API void llama_pause_threadpools(struct llama_context * ctx);
|
||||||
|
|
||||||
// Call once at the end of the program - currently only used for MPI
|
// Call once at the end of the program - currently only used for MPI
|
||||||
LLAMA_API void llama_backend_free(void);
|
LLAMA_API void llama_backend_free(void);
|
||||||
|
|
|
@ -17526,6 +17526,15 @@ void llama_detach_threadpools(struct llama_context * ctx) {
|
||||||
llama_detach_batch_threadpool(ctx);
|
llama_detach_batch_threadpool(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_pause_threadpools(struct llama_context * ctx) {
|
||||||
|
if (ctx->threadpool) {
|
||||||
|
ggml_pause_threadpool(ctx->threadpool);
|
||||||
|
}
|
||||||
|
if (ctx->threadpool_batch) {
|
||||||
|
ggml_pause_threadpool(ctx->threadpool_batch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void llama_backend_free(void) {
|
void llama_backend_free(void) {
|
||||||
ggml_quantize_free();
|
ggml_quantize_free();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue