mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
145 lines
4.1 KiB
C
145 lines
4.1 KiB
C
|
/*
|
||
|
* kmp_barrier.h
|
||
|
*/
|
||
|
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#ifndef KMP_BARRIER_H
|
||
|
#define KMP_BARRIER_H
|
||
|
|
||
|
#include "kmp.h"
|
||
|
#include "kmp_i18n.h"
|
||
|
|
||
|
#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
|
||
|
#include <xmmintrin.h>
|
||
|
#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
|
||
|
#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
|
||
|
#elif KMP_HAVE_ALIGNED_ALLOC
|
||
|
#define KMP_ALGIN_UP(val, alignment) \
|
||
|
(((val) + (alignment)-1) / (alignment) * (alignment))
|
||
|
#define KMP_ALIGNED_ALLOCATE(size, alignment) \
|
||
|
aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
|
||
|
#define KMP_ALIGNED_FREE(ptr) free(ptr)
|
||
|
#elif KMP_HAVE_POSIX_MEMALIGN
|
||
|
static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
|
||
|
void *ptr;
|
||
|
int n = posix_memalign(&ptr, alignment, size);
|
||
|
if (n != 0) {
|
||
|
if (ptr)
|
||
|
free(ptr);
|
||
|
return nullptr;
|
||
|
}
|
||
|
return ptr;
|
||
|
}
|
||
|
#define KMP_ALIGNED_FREE(ptr) free(ptr)
|
||
|
#elif KMP_HAVE__ALIGNED_MALLOC
|
||
|
#include <malloc.h>
|
||
|
#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
|
||
|
#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||
|
#else
|
||
|
#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
|
||
|
#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
|
||
|
#endif
|
||
|
|
||
|
// Use four cache lines: MLC tends to prefetch the next or previous cache line
|
||
|
// creating a possible fake conflict between cores, so this is the only way to
|
||
|
// guarantee that no such prefetch can happen.
|
||
|
#ifndef KMP_FOURLINE_ALIGN_CACHE
|
||
|
#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
|
||
|
#endif
|
||
|
|
||
|
#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
|
||
|
|
||
|
class distributedBarrier {
|
||
|
struct flags_s {
|
||
|
kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
|
||
|
};
|
||
|
|
||
|
struct go_s {
|
||
|
std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
|
||
|
};
|
||
|
|
||
|
struct iter_s {
|
||
|
kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
|
||
|
};
|
||
|
|
||
|
struct sleep_s {
|
||
|
std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
|
||
|
};
|
||
|
|
||
|
void init(size_t nthr);
|
||
|
void resize(size_t nthr);
|
||
|
void computeGo(size_t n);
|
||
|
void computeVarsForN(size_t n);
|
||
|
|
||
|
public:
|
||
|
enum {
|
||
|
MAX_ITERS = 3,
|
||
|
MAX_GOS = 8,
|
||
|
IDEAL_GOS = 4,
|
||
|
IDEAL_CONTENTION = 16,
|
||
|
};
|
||
|
|
||
|
flags_s *flags[MAX_ITERS];
|
||
|
go_s *go;
|
||
|
iter_s *iter;
|
||
|
sleep_s *sleep;
|
||
|
|
||
|
size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
|
||
|
size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
|
||
|
// number of go signals each requiring one write per iteration
|
||
|
size_t KMP_ALIGN_CACHE num_gos;
|
||
|
// number of groups of gos
|
||
|
size_t KMP_ALIGN_CACHE num_groups;
|
||
|
// threads per go signal
|
||
|
size_t KMP_ALIGN_CACHE threads_per_go;
|
||
|
bool KMP_ALIGN_CACHE fix_threads_per_go;
|
||
|
// threads per group
|
||
|
size_t KMP_ALIGN_CACHE threads_per_group;
|
||
|
// number of go signals in a group
|
||
|
size_t KMP_ALIGN_CACHE gos_per_group;
|
||
|
void *team_icvs;
|
||
|
|
||
|
distributedBarrier() = delete;
|
||
|
~distributedBarrier() = delete;
|
||
|
|
||
|
// Used instead of constructor to create aligned data
|
||
|
static distributedBarrier *allocate(int nThreads) {
|
||
|
distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
|
||
|
sizeof(distributedBarrier), 4 * CACHE_LINE);
|
||
|
if (!d) {
|
||
|
KMP_FATAL(MemoryAllocFailed);
|
||
|
}
|
||
|
d->num_threads = 0;
|
||
|
d->max_threads = 0;
|
||
|
for (int i = 0; i < MAX_ITERS; ++i)
|
||
|
d->flags[i] = NULL;
|
||
|
d->go = NULL;
|
||
|
d->iter = NULL;
|
||
|
d->sleep = NULL;
|
||
|
d->team_icvs = NULL;
|
||
|
d->fix_threads_per_go = false;
|
||
|
// calculate gos and groups ONCE on base size
|
||
|
d->computeGo(nThreads);
|
||
|
d->init(nThreads);
|
||
|
return d;
|
||
|
}
|
||
|
|
||
|
static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
|
||
|
|
||
|
void update_num_threads(size_t nthr) { init(nthr); }
|
||
|
|
||
|
bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
|
||
|
size_t get_num_threads() { return num_threads; }
|
||
|
kmp_uint64 go_release();
|
||
|
void go_reset();
|
||
|
};
|
||
|
|
||
|
#endif // KMP_BARRIER_H
|