// -*- C++ -*- // -*-===----------------------------------------------------------------------===// // // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // //===----------------------------------------------------------------------===// #ifndef _PSTL_INTERNAL_OMP_UTIL_H #define _PSTL_INTERNAL_OMP_UTIL_H #include #include #include #include #include #include #include #include #include "third_party/libcxx/__pstl/internal/parallel_backend_utils.h" #include "third_party/libcxx/__pstl/internal/unseq_backend_simd.h" #include "third_party/libcxx/__pstl/internal/utils.h" // Portability "#pragma" definition #ifdef _MSC_VER # define _PSTL_PRAGMA(x) __pragma(x) #else # define _PSTL_PRAGMA(x) _Pragma(# x) #endif namespace __pstl { namespace __omp_backend { //------------------------------------------------------------------------ // use to cancel execution //------------------------------------------------------------------------ inline void __cancel_execution() { // TODO: Figure out how to make cancelation work. } //------------------------------------------------------------------------ // raw buffer //------------------------------------------------------------------------ template class __buffer { std::allocator<_Tp> __allocator_; _Tp* __ptr_; const std::size_t __buf_size_; __buffer(const __buffer&) = delete; void operator=(const __buffer&) = delete; public: __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} operator bool() const { return __ptr_ != nullptr; } _Tp* get() const { return __ptr_; } ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); } }; // Preliminary size of each chunk: requires further discussion inline constexpr std::size_t __default_chunk_size = 2048; // Convenience function to determine when we should run serial. template ::value, bool> = true> constexpr auto __should_run_serial(_Iterator __first, _Iterator __last) -> bool { using _difference_type = typename std::iterator_traits<_Iterator>::difference_type; auto __size = std::distance(__first, __last); return __size <= static_cast<_difference_type>(__default_chunk_size); } template ::value, bool> = true> constexpr auto __should_run_serial(_Index __first, _Index __last) -> bool { using _difference_type = _Index; auto __size = __last - __first; return __size <= static_cast<_difference_type>(__default_chunk_size); } struct __chunk_metrics { std::size_t __n_chunks; std::size_t __chunk_size; std::size_t __first_chunk_size; }; // The iteration space partitioner according to __requested_chunk_size template auto __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics { /* * This algorithm improves distribution of elements in chunks by avoiding * small tail chunks. The leftover elements that do not fit neatly into * the chunk size are redistributed to early chunks. This improves * utilization of the processor's prefetch and reduces the number of * tasks needed by 1. */ const _Size __n = __last - __first; _Size __n_chunks = 0; _Size __chunk_size = 0; _Size __first_chunk_size = 0; if (__n < __requested_chunk_size) { __chunk_size = __n; __first_chunk_size = __n; __n_chunks = 1; return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; } __n_chunks = (__n / __requested_chunk_size) + 1; __chunk_size = __n / __n_chunks; __first_chunk_size = __chunk_size; const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size); if (__n_leftover_items == __chunk_size) { __n_chunks += 1; return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; } else if (__n_leftover_items == 0) { __first_chunk_size = __chunk_size; return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; } const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks; const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks); __chunk_size += __n_extra_items_per_chunk; __first_chunk_size = __chunk_size + __n_final_leftover_items; return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; } template void __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f) { auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size; auto __index = __chunk_index == 0 ? 0 : (__chunk_index * __metrics.__chunk_size) + (__metrics.__first_chunk_size - __metrics.__chunk_size); auto __first = __base + __index; auto __last = __first + __this_chunk_size; __f(__first, __last); } } // namespace __omp_backend } // namespace __pstl #endif // _PSTL_INTERNAL_OMP_UTIL_H