Minor fixes
This commit is contained in:
parent
130adf8415
commit
a0aae528bb
5 changed files with 18 additions and 6 deletions
|
@ -50,6 +50,6 @@ else()
|
|||
endif()
|
||||
add_subdirectory(save-load-state)
|
||||
add_subdirectory(simple)
|
||||
#add_subdirectory(speculative)
|
||||
add_subdirectory(speculative)
|
||||
add_subdirectory(tokenize)
|
||||
endif()
|
||||
|
|
|
@ -73,10 +73,11 @@ int main(int argc, char ** argv) {
|
|||
// load the draft model
|
||||
params.model = params.model_draft;
|
||||
params.n_gpu_layers = params.n_gpu_layers_draft;
|
||||
if (params.n_threads_draft > 0) {
|
||||
params.n_threads = params.n_threads_draft;
|
||||
if (params.draft_cpuparams.n_threads > 0) {
|
||||
params.cpuparams.n_threads = params.draft_cpuparams.n_threads;
|
||||
}
|
||||
params.n_threads_batch = params.n_threads_batch_draft;
|
||||
|
||||
params.cpuparams_batch.n_threads = params.draft_cpuparams_batch.n_threads;
|
||||
llama_init_result llama_init_dft = llama_init_from_gpt_params(params);
|
||||
model_dft = llama_init_dft.model;
|
||||
ctx_dft = llama_init_dft.context;
|
||||
|
|
|
@ -18737,7 +18737,7 @@ static bool __thread_affinity(const bool * mask) {
|
|||
|
||||
for (uint32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
|
||||
if (mask[i]) {
|
||||
printf("Thread %lx: adding %d to cpuset\n", pthread_self(), i);
|
||||
GGML_PRINT_DEBUG("Thread %lx: adding %d to cpuset\n", pthread_self(), i);
|
||||
CPU_SET(i, &cpuset);
|
||||
}
|
||||
}
|
||||
|
@ -19130,7 +19130,7 @@ static bool ggml_graph_compute_check_for_work(struct ggml_compute_state * state)
|
|||
if (threadpool->poll) {
|
||||
while (!threadpool->new_work && !threadpool->stop && !threadpool->pause) {
|
||||
// No new work. Yield and keep polling.
|
||||
//__cpu_relax();
|
||||
__cpu_relax();
|
||||
}
|
||||
} else {
|
||||
ggml_mutex_lock_shared(&threadpool->mutex);
|
||||
|
|
|
@ -439,6 +439,8 @@ extern "C" {
|
|||
LLAMA_API void llama_detach_batch_threadpool(struct llama_context * ctx);
|
||||
LLAMA_API void llama_detach_threadpools(struct llama_context * ctx);
|
||||
|
||||
// Pauses all attached threadpools
|
||||
LLAMA_API void llama_pause_threadpools(struct llama_context * ctx);
|
||||
|
||||
// Call once at the end of the program - currently only used for MPI
|
||||
LLAMA_API void llama_backend_free(void);
|
||||
|
|
|
@ -17526,6 +17526,15 @@ void llama_detach_threadpools(struct llama_context * ctx) {
|
|||
llama_detach_batch_threadpool(ctx);
|
||||
}
|
||||
|
||||
void llama_pause_threadpools(struct llama_context * ctx) {
|
||||
if (ctx->threadpool) {
|
||||
ggml_pause_threadpool(ctx->threadpool);
|
||||
}
|
||||
if (ctx->threadpool_batch) {
|
||||
ggml_pause_threadpool(ctx->threadpool_batch);
|
||||
}
|
||||
}
|
||||
|
||||
void llama_backend_free(void) {
|
||||
ggml_quantize_free();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue