Release Cosmopolitan v3.6.0

This release is an atomic upgrade to GCC 14.1.0 with C23 and C++23
This commit is contained in:
Justine Tunney 2024-07-23 03:16:17 -07:00
parent 62ace3623a
commit 5660ec4741
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
1585 changed files with 117353 additions and 271644 deletions

View file

@ -0,0 +1,99 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_ANY_OF_H
#define _LIBCPP___PSTL_CPU_ALGOS_ANY_OF_H
#include <__algorithm/any_of.h>
#include <__assert>
#include <__atomic/atomic.h>
#include <__atomic/memory_order.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <cstdint>
#include <optional>
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
auto __ret = __cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
__cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
return nullopt;
return static_cast<bool>(__found);
}
// TODO: check whether __simd_first() can be used here
template <class _Index, class _DifferenceType, class _Pred>
_LIBCPP_HIDE_FROM_ABI bool __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept {
_DifferenceType __block_size = 4 < __n ? 4 : __n;
const _Index __last = __first + __n;
while (__last != __first) {
int32_t __flag = 1;
_PSTL_PRAGMA_SIMD_REDUCTION(& : __flag)
for (_DifferenceType __i = 0; __i < __block_size; ++__i)
if (__pred(*(__first + __i)))
__flag = 0;
if (!__flag)
return true;
__first += __block_size;
if (__last - __first >= __block_size << 1) {
// Double the block _Size. Any unnecessary iterations can be amortized against work done so far.
__block_size <<= 1;
} else {
__block_size = __last - __first;
}
}
return false;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_any_of {
template <class _Policy, class _ForwardIterator, class _Predicate>
_LIBCPP_HIDE_FROM_ABI optional<bool>
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__parallel_or<_Backend>(
__first, __last, [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _AnyOfUnseq = __pstl::__any_of<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _AnyOfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
return *std::move(__res);
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__simd_or(__first, __last - __first, __pred);
} else {
return std::any_of(__first, __last, __pred);
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_ANY_OF_H

View file

@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H
#define _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H
#include <__config>
#include <cstddef>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
// __cpu_traits
//
// This traits class encapsulates the basis operations for a CPU-based implementation of the PSTL.
// All the operations in the PSTL can be implemented from these basis operations, so a pure CPU backend
// only needs to customize these traits in order to get an implementation of the whole PSTL.
//
// Basis operations
// ================
//
// template <class _RandomAccessIterator, class _Functor>
// optional<__empty> __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);
// - __func must take a subrange of [__first, __last) that should be executed in serial
//
// template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction>
// optional<_Tp> __transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction);
//
// template <class _RandomAccessIterator1,
// class _RandomAccessIterator2,
// class _RandomAccessIterator3,
// class _Compare,
// class _LeafMerge>
// optional<_RandomAccessIterator3> __merge(_RandomAccessIterator1 __first1,
// _RandomAccessIterator1 __last1,
// _RandomAccessIterator2 __first2,
// _RandomAccessIterator2 __last2,
// _RandomAccessIterator3 __outit,
// _Compare __comp,
// _LeafMerge __leaf_merge);
//
// template <class _RandomAccessIterator, class _Comp, class _LeafSort>
// optional<__empty> __stable_sort(_RandomAccessIterator __first,
// _RandomAccessIterator __last,
// _Comp __comp,
// _LeafSort __leaf_sort);
//
// void __cancel_execution();
// Cancel the execution of other jobs - they aren't needed anymore. This is not a binding request,
// some backends may not actually be able to cancel jobs.
//
// constexpr size_t __lane_size;
// Size of SIMD lanes.
// TODO: Merge this with __native_vector_size from __algorithm/simd_utils.h
//
//
// Exception handling
// ==================
//
// CPU backends are expected to report errors (i.e. failure to allocate) by returning a disengaged `optional` from their
// implementation. Exceptions shouldn't be used to report an internal failure-to-allocate, since all exceptions are
// turned into a program termination at the front-end level. When a backend returns a disengaged `optional` to the
// frontend, the frontend will turn that into a call to `std::__throw_bad_alloc();` to report the internal failure to
// the user.
template <class _Backend>
struct __cpu_traits;
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H

View file

@ -0,0 +1,66 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_FILL_H
#define _LIBCPP___PSTL_CPU_ALGOS_FILL_H
#include <__algorithm/fill.h>
#include <__assert>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Index, class _DifferenceType, class _Tp>
_LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept {
_PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
_PSTL_PRAGMA_SIMD
for (_DifferenceType __i = 0; __i < __n; ++__i)
__first[__i] = __value;
return __first + __n;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_fill {
template <class _Policy, class _ForwardIterator, class _Tp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _FillUnseq = __pstl::__fill<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
_FillUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __value);
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
__pstl::__simd_fill_n(__first, __last - __first, __value);
return __empty{};
} else {
std::fill(__first, __last, __value);
return __empty{};
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___PSTL_CPU_ALGOS_FILL_H

View file

@ -0,0 +1,137 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
#define _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
#include <__algorithm/find_if.h>
#include <__assert>
#include <__atomic/atomic.h>
#include <__config>
#include <__functional/operations.h>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <cstddef>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
__parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
const _DifferenceType __n = __last - __first;
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
auto __res =
__cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
_Index __result = __f(__i, __j);
// If not '__last' returned then we found what we want so put this to extremum
if (__result != __j) {
const _DifferenceType __k = __result - __first;
for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
__extremum.compare_exchange_weak(__old, __k);
}
}
}
});
if (!__res)
return nullopt;
return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
}
template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
const _DifferenceType __block_size = 8;
alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
const _DifferenceType __t = __comp(__first, __i);
__lane[__i - __begin] = __t;
__found |= __t;
}
if (__found) {
_DifferenceType __i;
// This will vectorize
for (__i = 0; __i < __block_size; ++__i) {
if (__lane[__i]) {
break;
}
}
return __first + __begin + __i;
}
__begin += __block_size;
}
// Keep remainder scalar
while (__begin != __end) {
if (__comp(__first, __begin)) {
return __first + __begin;
}
++__begin;
}
return __first + __end;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_find_if {
template <class _Policy, class _ForwardIterator, class _Predicate>
_LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__parallel_find<_Backend>(
__first,
__last,
[&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
return *std::move(__res);
},
less<>{},
true);
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
return __pstl::__simd_first<_Backend>(
__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
} else {
return std::find_if(__first, __last, __pred);
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H

View file

@ -0,0 +1,66 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_FOR_EACH_H
#define _LIBCPP___PSTL_CPU_ALGOS_FOR_EACH_H
#include <__algorithm/for_each.h>
#include <__assert>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Iterator, class _DifferenceType, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator __simd_for_each(_Iterator __first, _DifferenceType __n, _Function __f) noexcept {
_PSTL_PRAGMA_SIMD
for (_DifferenceType __i = 0; __i < __n; ++__i)
__f(__first[__i]);
return __first + __n;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_for_each {
template <class _Policy, class _ForwardIterator, class _Function>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Function __func) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __cpu_traits<_Backend>::__for_each(
__first, __last, [&__policy, __func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _ForEachUnseq = __pstl::__for_each<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __res =
_ForEachUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __func);
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
__pstl::__simd_for_each(__first, __last - __first, __func);
return __empty{};
} else {
std::for_each(__first, __last, __func);
return __empty{};
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___PSTL_CPU_ALGOS_FOR_EACH_H

View file

@ -0,0 +1,85 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_MERGE_H
#define _LIBCPP___PSTL_CPU_ALGOS_MERGE_H
#include <__algorithm/merge.h>
#include <__assert>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_merge {
template <class _Policy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardOutIterator, class _Comp>
_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> operator()(
_Policy&& __policy,
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
_ForwardIterator2 __last2,
_ForwardOutIterator __result,
_Comp __comp) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __cpu_traits<_Backend>::__merge(
__first1,
__last1,
__first2,
__last2,
__result,
__comp,
[&__policy](_ForwardIterator1 __g_first1,
_ForwardIterator1 __g_last1,
_ForwardIterator2 __g_first2,
_ForwardIterator2 __g_last2,
_ForwardOutIterator __g_result,
_Comp __g_comp) {
using _MergeUnseq = __pstl::__merge<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
[[maybe_unused]] auto __g_res = _MergeUnseq()(
std::__remove_parallel_policy(__policy),
std::move(__g_first1),
std::move(__g_last1),
std::move(__g_first2),
std::move(__g_last2),
std::move(__g_result),
std::move(__g_comp));
_LIBCPP_ASSERT_INTERNAL(__g_res, "unsed/sed should never try to allocate!");
});
if (!__res)
return nullopt;
return __result + (__last1 - __first1) + (__last2 - __first2);
} else {
return std::merge(__first1, __last1, __first2, __last2, __result, __comp);
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_MERGE_H

View file

@ -0,0 +1,47 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_STABLE_SORT_H
#define _LIBCPP___PSTL_CPU_ALGOS_STABLE_SORT_H
#include <__algorithm/stable_sort.h>
#include <__config>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_stable_sort {
template <class _Policy, class _RandomAccessIterator, class _Comp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
operator()(_Policy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy>) {
return __cpu_traits<_Backend>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
} else {
std::stable_sort(__first, __last, __comp);
return __empty{};
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___PSTL_CPU_ALGOS_STABLE_SORT_H

View file

@ -0,0 +1,153 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_H
#define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_H
#include <__algorithm/transform.h>
#include <__assert>
#include <__config>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator2
__simd_transform(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept {
_PSTL_PRAGMA_SIMD
for (_DifferenceType __i = 0; __i < __n; ++__i)
__f(__first1[__i], __first2[__i]);
return __first2 + __n;
}
template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Iterator3, class _Function>
_LIBCPP_HIDE_FROM_ABI _Iterator3 __simd_transform(
_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3, _Function __f) noexcept {
_PSTL_PRAGMA_SIMD
for (_DifferenceType __i = 0; __i < __n; ++__i)
__f(__first1[__i], __first2[__i], __first3[__i]);
return __first3 + __n;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_transform {
template <class _Policy, class _ForwardIterator, class _ForwardOutIterator, class _UnaryOperation>
_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator>
operator()(_Policy&& __policy,
_ForwardIterator __first,
_ForwardIterator __last,
_ForwardOutIterator __result,
_UnaryOperation __op) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
__cpu_traits<_Backend>::__for_each(
__first,
__last,
[&__policy, __op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
using _TransformUnseq = __pstl::__transform<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _TransformUnseq()(
std::__remove_parallel_policy(__policy),
__brick_first,
__brick_last,
__result + (__brick_first - __first),
__op);
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
return *std::move(__res);
});
return __result + (__last - __first);
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
return __pstl::__simd_transform(
__first,
__last - __first,
__result,
[&](__iter_reference<_ForwardIterator> __in_value, __iter_reference<_ForwardOutIterator> __out_value) {
__out_value = __op(__in_value);
});
} else {
return std::transform(__first, __last, __result, __op);
}
}
};
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_transform_binary {
template <class _Policy,
class _ForwardIterator1,
class _ForwardIterator2,
class _ForwardOutIterator,
class _BinaryOperation>
_LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator>
operator()(_Policy&& __policy,
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
_ForwardOutIterator __result,
_BinaryOperation __op) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __cpu_traits<_Backend>::__for_each(
__first1,
__last1,
[&__policy, __op, __first1, __first2, __result](
_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last) {
using _TransformBinaryUnseq =
__pstl::__transform_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
return _TransformBinaryUnseq()(
std::__remove_parallel_policy(__policy),
__brick_first,
__brick_last,
__first2 + (__brick_first - __first1),
__result + (__brick_first - __first1),
__op);
});
if (!__res)
return nullopt;
return __result + (__last1 - __first1);
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
return __pstl::__simd_transform(
__first1,
__last1 - __first1,
__first2,
__result,
[&](__iter_reference<_ForwardIterator1> __in1,
__iter_reference<_ForwardIterator2> __in2,
__iter_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in1, __in2); });
} else {
return std::transform(__first1, __last1, __first2, __result, __op);
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_H

View file

@ -0,0 +1,216 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H
#define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H
#include <__assert>
#include <__config>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__numeric/transform_reduce.h>
#include <__pstl/backend_fwd.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/is_arithmetic.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <cstddef>
#include <new>
#include <optional>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {
template <typename _Backend,
typename _DifferenceType,
typename _Tp,
typename _BinaryOperation,
typename _UnaryOperation,
typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>,
__enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> &&
is_arithmetic_v<_UnaryResult>,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept {
_PSTL_PRAGMA_SIMD_REDUCTION(+ : __init)
for (_DifferenceType __i = 0; __i < __n; ++__i)
__init += __f(__i);
return __init;
}
template <typename _Backend,
typename _Size,
typename _Tp,
typename _BinaryOperation,
typename _UnaryOperation,
typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>,
__enable_if_t<!(__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> &&
is_arithmetic_v<_UnaryResult>),
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
const _Size __block_size = __lane_size / sizeof(_Tp);
if (__n > 2 * __block_size && __block_size > 1) {
alignas(__lane_size) char __lane_buffer[__lane_size];
_Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer);
// initializer
_PSTL_PRAGMA_SIMD
for (_Size __i = 0; __i < __block_size; ++__i) {
::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i)));
}
// main loop
_Size __i = 2 * __block_size;
const _Size __last_iteration = __block_size * (__n / __block_size);
for (; __i < __last_iteration; __i += __block_size) {
_PSTL_PRAGMA_SIMD
for (_Size __j = 0; __j < __block_size; ++__j) {
__lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j));
}
}
// remainder
_PSTL_PRAGMA_SIMD
for (_Size __j = 0; __j < __n - __last_iteration; ++__j) {
__lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j));
}
// combiner
for (_Size __j = 0; __j < __block_size; ++__j) {
__init = __binary_op(std::move(__init), std::move(__lane[__j]));
}
// destroyer
_PSTL_PRAGMA_SIMD
for (_Size __j = 0; __j < __block_size; ++__j) {
__lane[__j].~_Tp();
}
} else {
for (_Size __i = 0; __i < __n; ++__i) {
__init = __binary_op(std::move(__init), __f(__i));
}
}
return __init;
}
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_transform_reduce_binary {
template <class _Policy,
class _ForwardIterator1,
class _ForwardIterator2,
class _Tp,
class _BinaryOperation1,
class _BinaryOperation2>
_LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()(
_Policy&& __policy,
_ForwardIterator1 __first1,
_ForwardIterator1 __last1,
_ForwardIterator2 __first2,
_Tp __init,
_BinaryOperation1 __reduce,
_BinaryOperation2 __transform) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return __cpu_traits<_Backend>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
return __transform(*__iter, *(__first2 + (__iter - __first1)));
},
std::move(__init),
std::move(__reduce),
[&__policy, __first1, __first2, __reduce, __transform](
_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) {
using _TransformReduceBinaryUnseq =
__pstl::__transform_reduce_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
return *_TransformReduceBinaryUnseq()(
std::__remove_parallel_policy(__policy),
__brick_first,
std::move(__brick_last),
__first2 + (__brick_first - __first1),
std::move(__brick_init),
std::move(__reduce),
std::move(__transform));
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return __pstl::__simd_transform_reduce<_Backend>(
__last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
return __transform(__first1[__i], __first2[__i]);
});
} else {
return std::transform_reduce(
std::move(__first1),
std::move(__last1),
std::move(__first2),
std::move(__init),
std::move(__reduce),
std::move(__transform));
}
}
};
template <class _Backend, class _RawExecutionPolicy>
struct __cpu_parallel_transform_reduce {
template <class _Policy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
_LIBCPP_HIDE_FROM_ABI optional<_Tp>
operator()(_Policy&& __policy,
_ForwardIterator __first,
_ForwardIterator __last,
_Tp __init,
_BinaryOperation __reduce,
_UnaryOperation __transform) const noexcept {
if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __cpu_traits<_Backend>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
std::move(__init),
__reduce,
[&__policy, __transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) {
using _TransformReduceUnseq =
__pstl::__transform_reduce<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
auto __res = _TransformReduceUnseq()(
std::__remove_parallel_policy(__policy),
std::move(__brick_first),
std::move(__brick_last),
std::move(__brick_init),
std::move(__reduce),
std::move(__transform));
_LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
return *std::move(__res);
});
} else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__simd_transform_reduce<_Backend>(
__last - __first,
std::move(__init),
std::move(__reduce),
[=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); });
} else {
return std::transform_reduce(
std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform));
}
}
};
} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H