mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-28 15:28:30 +00:00
Add OpenMP support
This commit is contained in:
parent
c1e18e7903
commit
5f8e9f14c1
742 changed files with 94643 additions and 1279 deletions
144
third_party/openmp/kmp_barrier.h
vendored
Normal file
144
third_party/openmp/kmp_barrier.h
vendored
Normal file
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* kmp_barrier.h
|
||||
*/
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef KMP_BARRIER_H
|
||||
#define KMP_BARRIER_H
|
||||
|
||||
#include "kmp.h"
|
||||
#include "kmp_i18n.h"
|
||||
|
||||
#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
|
||||
#include <xmmintrin.h>
|
||||
#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
|
||||
#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
|
||||
#elif KMP_HAVE_ALIGNED_ALLOC
|
||||
#define KMP_ALGIN_UP(val, alignment) \
|
||||
(((val) + (alignment)-1) / (alignment) * (alignment))
|
||||
#define KMP_ALIGNED_ALLOCATE(size, alignment) \
|
||||
aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
|
||||
#define KMP_ALIGNED_FREE(ptr) free(ptr)
|
||||
#elif KMP_HAVE_POSIX_MEMALIGN
|
||||
static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
|
||||
void *ptr;
|
||||
int n = posix_memalign(&ptr, alignment, size);
|
||||
if (n != 0) {
|
||||
if (ptr)
|
||||
free(ptr);
|
||||
return nullptr;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
#define KMP_ALIGNED_FREE(ptr) free(ptr)
|
||||
#elif KMP_HAVE__ALIGNED_MALLOC
|
||||
#include <malloc.h>
|
||||
#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
|
||||
#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
#else
|
||||
#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
|
||||
#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
|
||||
#endif
|
||||
|
||||
// Use four cache lines: MLC tends to prefetch the next or previous cache line
|
||||
// creating a possible fake conflict between cores, so this is the only way to
|
||||
// guarantee that no such prefetch can happen.
|
||||
#ifndef KMP_FOURLINE_ALIGN_CACHE
|
||||
#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
|
||||
#endif
|
||||
|
||||
#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
|
||||
|
||||
class distributedBarrier {
|
||||
struct flags_s {
|
||||
kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
|
||||
};
|
||||
|
||||
struct go_s {
|
||||
std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
|
||||
};
|
||||
|
||||
struct iter_s {
|
||||
kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
|
||||
};
|
||||
|
||||
struct sleep_s {
|
||||
std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
|
||||
};
|
||||
|
||||
void init(size_t nthr);
|
||||
void resize(size_t nthr);
|
||||
void computeGo(size_t n);
|
||||
void computeVarsForN(size_t n);
|
||||
|
||||
public:
|
||||
enum {
|
||||
MAX_ITERS = 3,
|
||||
MAX_GOS = 8,
|
||||
IDEAL_GOS = 4,
|
||||
IDEAL_CONTENTION = 16,
|
||||
};
|
||||
|
||||
flags_s *flags[MAX_ITERS];
|
||||
go_s *go;
|
||||
iter_s *iter;
|
||||
sleep_s *sleep;
|
||||
|
||||
size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
|
||||
size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
|
||||
// number of go signals each requiring one write per iteration
|
||||
size_t KMP_ALIGN_CACHE num_gos;
|
||||
// number of groups of gos
|
||||
size_t KMP_ALIGN_CACHE num_groups;
|
||||
// threads per go signal
|
||||
size_t KMP_ALIGN_CACHE threads_per_go;
|
||||
bool KMP_ALIGN_CACHE fix_threads_per_go;
|
||||
// threads per group
|
||||
size_t KMP_ALIGN_CACHE threads_per_group;
|
||||
// number of go signals in a group
|
||||
size_t KMP_ALIGN_CACHE gos_per_group;
|
||||
void *team_icvs;
|
||||
|
||||
distributedBarrier() = delete;
|
||||
~distributedBarrier() = delete;
|
||||
|
||||
// Used instead of constructor to create aligned data
|
||||
static distributedBarrier *allocate(int nThreads) {
|
||||
distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
|
||||
sizeof(distributedBarrier), 4 * CACHE_LINE);
|
||||
if (!d) {
|
||||
KMP_FATAL(MemoryAllocFailed);
|
||||
}
|
||||
d->num_threads = 0;
|
||||
d->max_threads = 0;
|
||||
for (int i = 0; i < MAX_ITERS; ++i)
|
||||
d->flags[i] = NULL;
|
||||
d->go = NULL;
|
||||
d->iter = NULL;
|
||||
d->sleep = NULL;
|
||||
d->team_icvs = NULL;
|
||||
d->fix_threads_per_go = false;
|
||||
// calculate gos and groups ONCE on base size
|
||||
d->computeGo(nThreads);
|
||||
d->init(nThreads);
|
||||
return d;
|
||||
}
|
||||
|
||||
static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
|
||||
|
||||
void update_num_threads(size_t nthr) { init(nthr); }
|
||||
|
||||
bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
|
||||
size_t get_num_threads() { return num_threads; }
|
||||
kmp_uint64 go_release();
|
||||
void go_reset();
|
||||
};
|
||||
|
||||
#endif // KMP_BARRIER_H
|
Loading…
Add table
Add a link
Reference in a new issue