This commit is contained in:
Farid Zakaria 2023-07-07 10:48:38 -07:00 committed by GitHub
commit 5af5413d27
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
168 changed files with 50926 additions and 6 deletions

View file

@ -182,6 +182,7 @@ include third_party/double-conversion/test/test.mk
include third_party/lua/lua.mk
include third_party/zstd/zstd.mk
include third_party/tr/tr.mk
include third_party/tbb/tbb.mk
include third_party/sed/sed.mk
include third_party/awk/awk.mk
include third_party/hiredis/hiredis.mk

View file

@ -781,7 +781,7 @@ bool __cxx_atomic_compare_exchange_strong(
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
bool __cxx_atomic_compare_exchange_strong(
__cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success,
memory_order __failure) {
@ -835,7 +835,7 @@ _Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp, typename _Td>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
memory_order __order) {
return __atomic_fetch_add(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
@ -851,7 +851,7 @@ _Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp, typename _Td>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
memory_order __order) {
return __atomic_fetch_sub(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
@ -867,7 +867,7 @@ _Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a,
_Tp __pattern, memory_order __order) {
return __atomic_fetch_and(&__a->__a_value, __pattern,
@ -875,7 +875,7 @@ _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a,
_Tp __pattern, memory_order __order) {
return __atomic_fetch_or(&__a->__a_value, __pattern,
@ -883,7 +883,7 @@ _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern,
memory_order __order) {
return __atomic_fetch_or(&__a->__a_value, __pattern,

View file

@ -109,6 +109,7 @@ THIRD_PARTY_LIBCXX_A_HDRS = \
third_party/libcxx/refstring.hh \
third_party/libcxx/regex \
third_party/libcxx/scoped_allocator \
third_party/libcxx/span \
third_party/libcxx/set \
third_party/libcxx/sstream \
third_party/libcxx/stack \

590
third_party/libcxx/span vendored Normal file
View file

@ -0,0 +1,590 @@
// -*- C++ -*-
//===------------------------------ span ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
#ifndef _LIBCPP_SPAN
#define _LIBCPP_SPAN
/*
span synopsis
namespace std {
// constants
inline constexpr size_t dynamic_extent = numeric_limits<size_t>::max();
// [views.span], class template span
template <class ElementType, size_t Extent = dynamic_extent>
class span;
// [span.objectrep], views of object representation
template <class ElementType, size_t Extent>
span<const byte, ((Extent == dynamic_extent) ? dynamic_extent :
(sizeof(ElementType) * Extent))> as_bytes(span<ElementType, Extent> s) noexcept;
template <class ElementType, size_t Extent>
span< byte, ((Extent == dynamic_extent) ? dynamic_extent :
(sizeof(ElementType) * Extent))> as_writable_bytes(span<ElementType, Extent> s) noexcept;
namespace std {
template <class ElementType, size_t Extent = dynamic_extent>
class span {
public:
// constants and types
using element_type = ElementType;
using value_type = remove_cv_t<ElementType>;
using index_type = size_t;
using difference_type = ptrdiff_t;
using pointer = element_type*;
using const_pointer = const element_type*;
using reference = element_type&;
using const_reference = const element_type&;
using iterator = implementation-defined;
using const_iterator = implementation-defined;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
static constexpr index_type extent = Extent;
// [span.cons], span constructors, copy, assignment, and destructor
constexpr span() noexcept;
constexpr span(pointer ptr, index_type count);
constexpr span(pointer firstElem, pointer lastElem);
template <size_t N>
constexpr span(element_type (&arr)[N]) noexcept;
template <size_t N>
constexpr span(array<value_type, N>& arr) noexcept;
template <size_t N>
constexpr span(const array<value_type, N>& arr) noexcept;
template <class Container>
constexpr span(Container& cont);
template <class Container>
constexpr span(const Container& cont);
constexpr span(const span& other) noexcept = default;
template <class OtherElementType, size_t OtherExtent>
constexpr span(const span<OtherElementType, OtherExtent>& s) noexcept;
~span() noexcept = default;
constexpr span& operator=(const span& other) noexcept = default;
// [span.sub], span subviews
template <size_t Count>
constexpr span<element_type, Count> first() const;
template <size_t Count>
constexpr span<element_type, Count> last() const;
template <size_t Offset, size_t Count = dynamic_extent>
constexpr span<element_type, see below> subspan() const;
constexpr span<element_type, dynamic_extent> first(index_type count) const;
constexpr span<element_type, dynamic_extent> last(index_type count) const;
constexpr span<element_type, dynamic_extent> subspan(index_type offset, index_type count = dynamic_extent) const;
// [span.obs], span observers
constexpr index_type size() const noexcept;
constexpr index_type size_bytes() const noexcept;
constexpr bool empty() const noexcept;
// [span.elem], span element access
constexpr reference operator[](index_type idx) const;
constexpr reference front() const;
constexpr reference back() const;
constexpr pointer data() const noexcept;
// [span.iterators], span iterator support
constexpr iterator begin() const noexcept;
constexpr iterator end() const noexcept;
constexpr const_iterator cbegin() const noexcept;
constexpr const_iterator cend() const noexcept;
constexpr reverse_iterator rbegin() const noexcept;
constexpr reverse_iterator rend() const noexcept;
constexpr const_reverse_iterator crbegin() const noexcept;
constexpr const_reverse_iterator crend() const noexcept;
private:
pointer data_; // exposition only
index_type size_; // exposition only
};
template<class T, size_t N>
span(T (&)[N]) -> span<T, N>;
template<class T, size_t N>
span(array<T, N>&) -> span<T, N>;
template<class T, size_t N>
span(const array<T, N>&) -> span<const T, N>;
template<class Container>
span(Container&) -> span<typename Container::value_type>;
template<class Container>
span(const Container&) -> span<const typename Container::value_type>;
} // namespace std
*/
#include "third_party/libcxx/__config"
#include "third_party/libcxx/cstddef" // for ptrdiff_t
#include "third_party/libcxx/iterator" // for iterators
#include "third_party/libcxx/array" // for array
#include "third_party/libcxx/type_traits" // for remove_cv, etc
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
inline constexpr size_t dynamic_extent = numeric_limits<size_t>::max();
template <typename _Tp, size_t _Extent = dynamic_extent> class span;
template <class _Tp>
struct __is_span_impl : public false_type {};
template <class _Tp, size_t _Extent>
struct __is_span_impl<span<_Tp, _Extent>> : public true_type {};
template <class _Tp>
struct __is_span : public __is_span_impl<remove_cv_t<_Tp>> {};
template <class _Tp>
struct __is_std_array_impl : public false_type {};
template <class _Tp, size_t _Sz>
struct __is_std_array_impl<array<_Tp, _Sz>> : public true_type {};
template <class _Tp>
struct __is_std_array : public __is_std_array_impl<remove_cv_t<_Tp>> {};
template <class _Tp, class _ElementType, class = void>
struct __is_span_compatible_container : public false_type {};
template <class _Tp, class _ElementType>
struct __is_span_compatible_container<_Tp, _ElementType,
void_t<
// is not a specialization of span
typename enable_if<!__is_span<_Tp>::value, nullptr_t>::type,
// is not a specialization of array
typename enable_if<!__is_std_array<_Tp>::value, nullptr_t>::type,
// is_array_v<Container> is false,
typename enable_if<!is_array_v<_Tp>, nullptr_t>::type,
// data(cont) and size(cont) are well formed
decltype(data(declval<_Tp>())),
decltype(size(declval<_Tp>())),
// remove_pointer_t<decltype(data(cont))>(*)[] is convertible to ElementType(*)[]
typename enable_if<
is_convertible_v<remove_pointer_t<decltype(data(declval<_Tp &>()))>(*)[],
_ElementType(*)[]>,
nullptr_t>::type
>>
: public true_type {};
template <typename _Tp, size_t _Extent>
class _LIBCPP_TEMPLATE_VIS span {
public:
// constants and types
using element_type = _Tp;
using value_type = remove_cv_t<_Tp>;
using index_type = size_t;
using difference_type = ptrdiff_t;
using pointer = _Tp *;
using const_pointer = const _Tp *;
using reference = _Tp &;
using const_reference = const _Tp &;
using iterator = __wrap_iter<pointer>;
using const_iterator = __wrap_iter<const_pointer>;
using reverse_iterator = _VSTD::reverse_iterator<iterator>;
using const_reverse_iterator = _VSTD::reverse_iterator<const_iterator>;
static constexpr index_type extent = _Extent;
// [span.cons], span constructors, copy, assignment, and destructor
_LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr}
{ static_assert(_Extent == 0, "Can't default construct a statically sized span with size > 0"); }
constexpr span (const span&) noexcept = default;
constexpr span& operator=(const span&) noexcept = default;
_LIBCPP_INLINE_VISIBILITY constexpr span(pointer __ptr, index_type __count) : __data{__ptr}
{ (void)__count; _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (ptr, len)"); }
_LIBCPP_INLINE_VISIBILITY constexpr span(pointer __f, pointer __l) : __data{__f}
{ (void)__l; _LIBCPP_ASSERT(_Extent == distance(__f, __l), "size mismatch in span's constructor (ptr, ptr)"); }
_LIBCPP_INLINE_VISIBILITY constexpr span(element_type (&__arr)[_Extent]) noexcept : __data{__arr} {}
_LIBCPP_INLINE_VISIBILITY constexpr span( array<value_type, _Extent>& __arr) noexcept : __data{__arr.data()} {}
_LIBCPP_INLINE_VISIBILITY constexpr span(const array<value_type, _Extent>& __arr) noexcept : __data{__arr.data()} {}
template <class _OtherElementType>
_LIBCPP_INLINE_VISIBILITY
constexpr span(const span<_OtherElementType, _Extent>& __other,
enable_if_t<
is_convertible_v<_OtherElementType(*)[], element_type (*)[]>,
nullptr_t> = nullptr)
: __data{__other.data()} {}
template <class _OtherElementType>
_LIBCPP_INLINE_VISIBILITY
constexpr span(const span<_OtherElementType, dynamic_extent>& __other,
enable_if_t<
is_convertible_v<_OtherElementType(*)[], element_type (*)[]>,
nullptr_t> = nullptr) noexcept
: __data{__other.data()} { _LIBCPP_ASSERT(_Extent == __other.size(), "size mismatch in span's constructor (other span)"); }
// ~span() noexcept = default;
template <size_t _Count>
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, _Count> first() const noexcept
{
static_assert(_Count <= _Extent, "Count out of range in span::first()");
return {data(), _Count};
}
template <size_t _Count>
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, _Count> last() const noexcept
{
static_assert(_Count <= _Extent, "Count out of range in span::last()");
return {data() + size() - _Count, _Count};
}
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, dynamic_extent> first(index_type __count) const noexcept
{
_LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)");
return {data(), __count};
}
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, dynamic_extent> last(index_type __count) const noexcept
{
_LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)");
return {data() + size() - __count, __count};
}
template <size_t _Offset, size_t _Count = dynamic_extent>
_LIBCPP_INLINE_VISIBILITY
constexpr auto subspan() const noexcept
-> span<element_type, _Count != dynamic_extent ? _Count : _Extent - _Offset>
{
static_assert(_Offset <= _Extent, "Offset out of range in span::subspan()");
return {data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count};
}
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, dynamic_extent>
subspan(index_type __offset, index_type __count = dynamic_extent) const noexcept
{
_LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)");
_LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "Count out of range in span::subspan(offset, count)");
if (__count == dynamic_extent)
return {data() + __offset, size() - __offset};
_LIBCPP_ASSERT(__offset <= size() - __count, "count + offset out of range in span::subspan(offset, count)");
return {data() + __offset, __count};
}
_LIBCPP_INLINE_VISIBILITY constexpr index_type size() const noexcept { return _Extent; }
_LIBCPP_INLINE_VISIBILITY constexpr index_type size_bytes() const noexcept { return _Extent * sizeof(element_type); }
_LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; }
_LIBCPP_INLINE_VISIBILITY constexpr reference operator[](index_type __idx) const noexcept
{
_LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span<T,N>[] index out of bounds");
return __data[__idx];
}
_LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept
{
static_assert(_Extent > 0, "span<T,N>[].front() on empty span");
return __data[0];
}
_LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept
{
static_assert(_Extent > 0, "span<T,N>[].back() on empty span");
return __data[size()-1];
}
_LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; }
// [span.iter], span iterator support
_LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); }
_LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_iterator cbegin() const noexcept { return const_iterator(data()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_iterator cend() const noexcept { return const_iterator(data() + size()); }
_LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
_LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); }
_LIBCPP_INLINE_VISIBILITY constexpr void swap(span &__other) noexcept
{
pointer __p = __data;
__data = __other.__data;
__other.__data = __p;
}
_LIBCPP_INLINE_VISIBILITY span<const byte, _Extent * sizeof(element_type)> __as_bytes() const noexcept
{ return {reinterpret_cast<const byte *>(data()), size_bytes()}; }
_LIBCPP_INLINE_VISIBILITY span<byte, _Extent * sizeof(element_type)> __as_writable_bytes() const noexcept
{ return {reinterpret_cast<byte *>(data()), size_bytes()}; }
private:
pointer __data;
};
template <typename _Tp>
class _LIBCPP_TEMPLATE_VIS span<_Tp, dynamic_extent> {
private:
public:
// constants and types
using element_type = _Tp;
using value_type = remove_cv_t<_Tp>;
using index_type = size_t;
using difference_type = ptrdiff_t;
using pointer = _Tp *;
using const_pointer = const _Tp *;
using reference = _Tp &;
using const_reference = const _Tp &;
using iterator = __wrap_iter<pointer>;
using const_iterator = __wrap_iter<const_pointer>;
using reverse_iterator = _VSTD::reverse_iterator<iterator>;
using const_reverse_iterator = _VSTD::reverse_iterator<const_iterator>;
static constexpr index_type extent = dynamic_extent;
// [span.cons], span constructors, copy, assignment, and destructor
_LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr}, __size{0} {}
constexpr span (const span&) noexcept = default;
constexpr span& operator=(const span&) noexcept = default;
_LIBCPP_INLINE_VISIBILITY constexpr span(pointer __ptr, index_type __count) : __data{__ptr}, __size{__count} {}
_LIBCPP_INLINE_VISIBILITY constexpr span(pointer __f, pointer __l) : __data{__f}, __size{static_cast<size_t>(distance(__f, __l))} {}
template <size_t _Sz>
_LIBCPP_INLINE_VISIBILITY
constexpr span(element_type (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {}
template <size_t _Sz>
_LIBCPP_INLINE_VISIBILITY
constexpr span(array<value_type, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {}
template <size_t _Sz>
_LIBCPP_INLINE_VISIBILITY
constexpr span(const array<value_type, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {}
template <class _Container>
_LIBCPP_INLINE_VISIBILITY
constexpr span( _Container& __c,
enable_if_t<__is_span_compatible_container<_Container, _Tp>::value, nullptr_t> = nullptr)
: __data{_VSTD::data(__c)}, __size{(index_type) _VSTD::size(__c)} {}
template <class _Container>
_LIBCPP_INLINE_VISIBILITY
constexpr span(const _Container& __c,
enable_if_t<__is_span_compatible_container<const _Container, _Tp>::value, nullptr_t> = nullptr)
: __data{_VSTD::data(__c)}, __size{(index_type) _VSTD::size(__c)} {}
template <class _OtherElementType, size_t _OtherExtent>
_LIBCPP_INLINE_VISIBILITY
constexpr span(const span<_OtherElementType, _OtherExtent>& __other,
enable_if_t<
is_convertible_v<_OtherElementType(*)[], element_type (*)[]>,
nullptr_t> = nullptr) noexcept
: __data{__other.data()}, __size{__other.size()} {}
// ~span() noexcept = default;
template <size_t _Count>
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, _Count> first() const noexcept
{
_LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::first()");
return {data(), _Count};
}
template <size_t _Count>
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, _Count> last() const noexcept
{
_LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::last()");
return {data() + size() - _Count, _Count};
}
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, dynamic_extent> first(index_type __count) const noexcept
{
_LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)");
return {data(), __count};
}
_LIBCPP_INLINE_VISIBILITY
constexpr span<element_type, dynamic_extent> last (index_type __count) const noexcept
{
_LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)");
return {data() + size() - __count, __count};
}
template <size_t _Offset, size_t _Count = dynamic_extent>
_LIBCPP_INLINE_VISIBILITY
constexpr span<_Tp, dynamic_extent> subspan() const noexcept
{
_LIBCPP_ASSERT(_Offset <= size(), "Offset out of range in span::subspan()");
_LIBCPP_ASSERT(_Count == dynamic_extent || _Offset + _Count <= size(), "Count out of range in span::subspan()");
return {data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count};
}
constexpr span<element_type, dynamic_extent>
_LIBCPP_INLINE_VISIBILITY
subspan(index_type __offset, index_type __count = dynamic_extent) const noexcept
{
_LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)");
_LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "count out of range in span::subspan(offset, count)");
if (__count == dynamic_extent)
return {data() + __offset, size() - __offset};
_LIBCPP_ASSERT(__offset <= size() - __count, "Offset + count out of range in span::subspan(offset, count)");
return {data() + __offset, __count};
}
_LIBCPP_INLINE_VISIBILITY constexpr index_type size() const noexcept { return __size; }
_LIBCPP_INLINE_VISIBILITY constexpr index_type size_bytes() const noexcept { return __size * sizeof(element_type); }
_LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; }
_LIBCPP_INLINE_VISIBILITY constexpr reference operator[](index_type __idx) const noexcept
{
_LIBCPP_ASSERT(__idx >= 0 && __idx < size(), "span<T>[] index out of bounds");
return __data[__idx];
}
_LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept
{
_LIBCPP_ASSERT(!empty(), "span<T>[].front() on empty span");
return __data[0];
}
_LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept
{
_LIBCPP_ASSERT(!empty(), "span<T>[].back() on empty span");
return __data[size()-1];
}
_LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; }
// [span.iter], span iterator support
_LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); }
_LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_iterator cbegin() const noexcept { return const_iterator(data()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_iterator cend() const noexcept { return const_iterator(data() + size()); }
_LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
_LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); }
_LIBCPP_INLINE_VISIBILITY constexpr const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); }
_LIBCPP_INLINE_VISIBILITY constexpr void swap(span &__other) noexcept
{
pointer __p = __data;
__data = __other.__data;
__other.__data = __p;
index_type __sz = __size;
__size = __other.__size;
__other.__size = __sz;
}
_LIBCPP_INLINE_VISIBILITY span<const byte, dynamic_extent> __as_bytes() const noexcept
{ return {reinterpret_cast<const byte *>(data()), size_bytes()}; }
_LIBCPP_INLINE_VISIBILITY span<byte, dynamic_extent> __as_writable_bytes() const noexcept
{ return {reinterpret_cast<byte *>(data()), size_bytes()}; }
private:
pointer __data;
index_type __size;
};
// tuple interface
template <class _Tp, size_t _Size>
struct _LIBCPP_TEMPLATE_VIS tuple_size<span<_Tp, _Size>>
: public integral_constant<size_t, _Size> {};
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS tuple_size<span<_Tp, dynamic_extent>>; // declared but not defined
template <size_t _Ip, class _Tp, size_t _Size>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, span<_Tp, _Size>>
{
static_assert( dynamic_extent != _Size, "std::tuple_element<> not supported for std::span<T, dynamic_extent>");
static_assert(_Ip < _Size, "Index out of bounds in std::tuple_element<> (std::span)");
typedef _Tp type;
};
template <size_t _Ip, class _Tp, size_t _Size>
_LIBCPP_INLINE_VISIBILITY constexpr
_Tp&
get(span<_Tp, _Size> __s) noexcept
{
static_assert( dynamic_extent != _Size, "std::get<> not supported for std::span<T, dynamic_extent>");
static_assert(_Ip < _Size, "Index out of bounds in std::get<> (std::span)");
return __s[_Ip];
}
// as_bytes & as_writable_bytes
template <class _Tp, size_t _Extent>
_LIBCPP_INLINE_VISIBILITY
auto as_bytes(span<_Tp, _Extent> __s) noexcept
-> decltype(__s.__as_bytes())
{ return __s.__as_bytes(); }
template <class _Tp, size_t _Extent>
_LIBCPP_INLINE_VISIBILITY
auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept
-> enable_if_t<!is_const_v<_Tp>, decltype(__s.__as_writable_bytes())>
{ return __s.__as_writable_bytes(); }
template <class _Tp, size_t _Extent>
_LIBCPP_INLINE_VISIBILITY
constexpr void swap(span<_Tp, _Extent> &__lhs, span<_Tp, _Extent> &__rhs) noexcept
{ __lhs.swap(__rhs); }
// Deduction guides
template<class _Tp, size_t _Sz>
span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>;
template<class _Tp, size_t _Sz>
span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>;
template<class _Tp, size_t _Sz>
span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
template<class _Container>
span(_Container&) -> span<typename _Container::value_type>;
template<class _Container>
span(const _Container&) -> span<const typename _Container::value_type>;
#endif // _LIBCPP_STD_VER > 17
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_SPAN

17
third_party/tbb/README.cosmo vendored Normal file
View file

@ -0,0 +1,17 @@
// clang-format off
DESCRIPTION
oneAPI Threading Building Blocks (oneTBB)
oneTBB is a flexible C++ library that simplifies the work of adding parallelism to complex applications,
even if you are not a threading expert.
SOURCE
https://github.com/oneapi-src/oneTBB
commit e813596ba3a1bee0ffa06fb66b5e30b7ea801319
Author: Alexandra <alexandra.epanchinzeva@intel.com>
Date: Wed Jun 21 18:46:54 2023 +0200
Documentation for std::invoke (#1112)

107
third_party/tbb/address_waiter.cc vendored Normal file
View file

@ -0,0 +1,107 @@
// clang-format off
/*
Copyright (c) 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/concurrent_monitor.hh"
#include "third_party/tbb/detail/_waitable_atomic.hh"
#include "third_party/libcxx/type_traits"
namespace tbb {
namespace detail {
namespace r1 {
struct address_context {
address_context() = default;
address_context(void* address, std::uintptr_t context) :
my_address(address), my_context(context)
{}
void* my_address{nullptr};
std::uintptr_t my_context{0};
};
class address_waiter : public concurrent_monitor_base<address_context> {
using base_type = concurrent_monitor_base<address_context>;
public:
using base_type::base_type;
/** per-thread descriptor for concurrent_monitor */
using thread_context = sleep_node<address_context>;
};
// 1024 is a rough estimate based on two assumptions:
// 1) there are no more than 1000 threads in the application;
// 2) the mutexes are optimized for short critical sections less than a couple of microseconds,
// which is less than 1/1000 of a time slice.
// In the worst case, we have single mutex that is locked and its thread is preempted.
// Therefore, the probability of a collision while taking unrelated mutex is about 1/size of a table.
static constexpr std::size_t num_address_waiters = 2 << 10;
static_assert(std::is_standard_layout<address_waiter>::value,
"address_waiter must be with standard layout");
static address_waiter address_waiter_table[num_address_waiters];
void clear_address_waiter_table() {
for (std::size_t i = 0; i < num_address_waiters; ++i) {
address_waiter_table[i].destroy();
}
}
static address_waiter& get_address_waiter(void* address) {
std::uintptr_t tag = std::uintptr_t(address);
return address_waiter_table[((tag >> 5) ^ tag) % num_address_waiters];
}
void wait_on_address(void* address, d1::delegate_base& predicate, std::uintptr_t context) {
address_waiter& waiter = get_address_waiter(address);
waiter.wait<address_waiter::thread_context>(predicate, address_context{address, context});
}
void notify_by_address(void* address, std::uintptr_t target_context) {
address_waiter& waiter = get_address_waiter(address);
auto predicate = [address, target_context] (address_context ctx) {
return ctx.my_address == address && ctx.my_context == target_context;
};
waiter.notify_relaxed(predicate);
}
void notify_by_address_one(void* address) {
address_waiter& waiter = get_address_waiter(address);
auto predicate = [address] (address_context ctx) {
return ctx.my_address == address;
};
waiter.notify_one_relaxed(predicate);
}
void notify_by_address_all(void* address) {
address_waiter& waiter = get_address_waiter(address);
auto predicate = [address] (address_context ctx) {
return ctx.my_address == address;
};
waiter.notify_relaxed(predicate);
}
} // namespace r1
} // namespace detail
} // namespace tbb

314
third_party/tbb/allocator.cc vendored Normal file
View file

@ -0,0 +1,314 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/version.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/tbb_allocator.hh" // Is this OK?
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/tbb/dynamic_link.hh"
#include "third_party/tbb/misc.hh"
#include "third_party/libcxx/cstdlib"
#ifdef _WIN32
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
#else
#include "libc/runtime/dlfcn.h"
#endif
#if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__)
#include "libc/calls/calls.h"
#include "libc/calls/termios.h"
#include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/mem/alg.h"
#include "libc/mem/alloca.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/temp.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/exit.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/rand48.h" // posix_memalign, free
// With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory
// can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of
// memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other
// standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we
// can't detect musl, so we simply enable memalign() on Linux and Android in general.
#if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__)
#include "libc/mem/mem.h" // memalign
#define __TBB_USE_MEMALIGN
#else
#define __TBB_USE_POSIX_MEMALIGN
#endif
#elif defined(_MSC_VER) || defined(__MINGW32__)
#include "libc/mem/mem.h" // _aligned_malloc, _aligned_free
#define __TBB_USE_MSVC_ALIGNED_MALLOC
#endif
#if __TBB_WEAK_SYMBOLS_PRESENT
#pragma weak scalable_malloc
#pragma weak scalable_free
#pragma weak scalable_aligned_malloc
#pragma weak scalable_aligned_free
extern "C" {
void* scalable_malloc(std::size_t);
void scalable_free(void*);
void* scalable_aligned_malloc(std::size_t, std::size_t);
void scalable_aligned_free(void*);
}
#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
namespace tbb {
namespace detail {
namespace r1 {
//! Initialization routine used for first indirect call via allocate_handler.
static void* initialize_allocate_handler(std::size_t size);
//! Handler for memory allocation
using allocate_handler_type = void* (*)(std::size_t size);
static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
allocate_handler_type allocate_handler_unsafe = nullptr;
//! Handler for memory deallocation
static void (*deallocate_handler)(void* pointer) = nullptr;
//! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
//! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
//! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available.
static void std_cache_aligned_deallocate(void* p);
//! Handler for padded memory allocation
using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
//! Handler for padded memory deallocation
static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
//! Table describing how to link the handlers.
static const dynamic_link_descriptor MallocLinkTable[] = {
DLD(scalable_malloc, allocate_handler_unsafe),
DLD(scalable_free, deallocate_handler),
DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
};
#if TBB_USE_DEBUG
#define DEBUG_SUFFIX "_debug"
#else
#define DEBUG_SUFFIX
#endif /* TBB_USE_DEBUG */
// MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
#if _WIN32||_WIN64
#define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
#elif __APPLE__
#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib"
#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
#elif __unix__ // Note that order of these #elif's is important!
#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
#else
#error Unknown OS
#endif
//! Initialize the allocation/free handler pointers.
/** Caller is responsible for ensuring this routine is called exactly once.
The routine attempts to dynamically link with the TBB memory allocator.
If that allocator is not found, it links to malloc and free. */
void initialize_handler_pointers() {
__TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr);
bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
if(!success) {
// If unsuccessful, set the handlers to the default routines.
// This must be done now, and not before FillDynamicLinks runs, because if other
// threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
// which forces them to wait.
allocate_handler_unsafe = &std::malloc;
deallocate_handler = &std::free;
cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
}
allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
}
static std::once_flag initialization_state;
void initialize_cache_aligned_allocator() {
std::call_once(initialization_state, &initialize_handler_pointers);
}
//! Executed on very first call through allocate_handler
static void* initialize_allocate_handler(std::size_t size) {
initialize_cache_aligned_allocator();
__TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr);
return (*allocate_handler)(size);
}
//! Executed on very first call through cache_aligned_allocate_handler
static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
initialize_cache_aligned_allocator();
__TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr);
return (*cache_aligned_allocate_handler)(bytes, alignment);
}
// TODO: use CPUID to find actual line size, though consider backward compatibility
// nfs - no false sharing
static constexpr std::size_t nfs_size = 128;
std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
return nfs_size;
}
void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
const std::size_t cache_line_size = nfs_size;
__TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
// Check for overflow
if (size + cache_line_size < size) {
throw_exception(exception_id::bad_alloc);
}
// scalable_aligned_malloc considers zero size request an error, and returns nullptr
if (size == 0) size = 1;
void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
if (!result) {
throw_exception(exception_id::bad_alloc);
}
__TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
return result;
}
void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
__TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
(*cache_aligned_deallocate_handler)(p);
}
static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
#if defined(__TBB_USE_MEMALIGN)
return memalign(alignment, bytes);
#elif defined(__TBB_USE_POSIX_MEMALIGN)
void* p = nullptr;
int res = posix_memalign(&p, alignment, bytes);
if (res != 0)
p = nullptr;
return p;
#elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
return _aligned_malloc(bytes, alignment);
#else
// TODO: make it common with cache_aligned_resource
std::size_t space = alignment + bytes;
std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
if (!base) {
return nullptr;
}
std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
// Round up to the next cache line (align the base address)
__TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
__TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
// Record where block actually starts.
(reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
return reinterpret_cast<void*>(result);
#endif
}
static void std_cache_aligned_deallocate(void* p) {
#if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN)
free(p);
#elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC)
_aligned_free(p);
#else
if (p) {
__TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
// Recover where block actually starts
std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
__TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
std::free(reinterpret_cast<void*>(base));
}
#endif
}
void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
void* result = allocate_handler.load(std::memory_order_acquire)(size);
if (!result) {
throw_exception(exception_id::bad_alloc);
}
return result;
}
void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
if (p) {
__TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
(*deallocate_handler)(p);
}
}
bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
if (handler_snapshot == &initialize_allocate_handler) {
initialize_cache_aligned_allocator();
}
handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
__TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr);
// Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
__TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
"Both shim pointers must refer to routines from the same package (either TBB or CRT)");
return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
}
} // namespace r1
} // namespace detail
} // namespace tbb

858
third_party/tbb/arena.cc vendored Normal file
View file

@ -0,0 +1,858 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/task_dispatcher.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/threading_control.hh"
#include "third_party/tbb/arena.hh"
#include "third_party/tbb/itt_notify.hh"
#include "third_party/tbb/semaphore.hh"
#include "third_party/tbb/waiters.hh"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/info.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/functional"
namespace tbb {
namespace detail {
namespace r1 {
#if __TBB_ARENA_BINDING
class numa_binding_observer : public tbb::task_scheduler_observer {
binding_handler* my_binding_handler;
public:
numa_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core )
: task_scheduler_observer(*ta)
, my_binding_handler(construct_binding_handler(num_slots, numa_id, core_type, max_threads_per_core))
{}
void on_scheduler_entry( bool ) override {
apply_affinity_mask(my_binding_handler, this_task_arena::current_thread_index());
}
void on_scheduler_exit( bool ) override {
restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index());
}
~numa_binding_observer() override{
destroy_binding_handler(my_binding_handler);
}
};
numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) {
numa_binding_observer* binding_observer = nullptr;
if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) {
binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core);
__TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction");
binding_observer->observe(true);
}
return binding_observer;
}
void destroy_binding_observer( numa_binding_observer* binding_observer ) {
__TBB_ASSERT(binding_observer, "Trying to deallocate nullptr pointer");
binding_observer->observe(false);
binding_observer->~numa_binding_observer();
deallocate_memory(binding_observer);
}
#endif /*!__TBB_ARENA_BINDING*/
void arena::on_thread_leaving(unsigned ref_param) {
//
// Implementation of arena destruction synchronization logic contained various
// bugs/flaws at the different stages of its evolution, so below is a detailed
// description of the issues taken into consideration in the framework of the
// current design.
//
// In case of using fire-and-forget tasks (scheduled via task::enqueue())
// external thread is allowed to leave its arena before all its work is executed,
// and market may temporarily revoke all workers from this arena. Since revoked
// workers never attempt to reset arena state to EMPTY and cancel its request
// to RML for threads, the arena object is destroyed only when both the last
// thread is leaving it and arena's state is EMPTY (that is its external thread
// left and it does not contain any work).
// Thus resetting arena to EMPTY state (as earlier TBB versions did) should not
// be done here (or anywhere else in the external thread to that matter); doing so
// can result either in arena's premature destruction (at least without
// additional costly checks in workers) or in unnecessary arena state changes
// (and ensuing workers migration).
//
// A worker that checks for work presence and transitions arena to the EMPTY
// state (in snapshot taking procedure arena::out_of_work()) updates
// arena::my_pool_state first and only then arena::my_num_workers_requested.
// So the check for work absence must be done against the latter field.
//
// In a time window between decrementing the active threads count and checking
// if there is an outstanding request for workers. New worker thread may arrive,
// finish remaining work, set arena state to empty, and leave decrementing its
// refcount and destroying. Then the current thread will destroy the arena
// the second time. To preclude it a local copy of the outstanding request
// value can be stored before decrementing active threads count.
//
// But this technique may cause two other problem. When the stored request is
// zero, it is possible that arena still has threads and they can generate new
// tasks and thus re-establish non-zero requests. Then all the threads can be
// revoked (as described above) leaving this thread the last one, and causing
// it to destroy non-empty arena.
//
// The other problem takes place when the stored request is non-zero. Another
// thread may complete the work, set arena state to empty, and leave without
// arena destruction before this thread decrements the refcount. This thread
// cannot destroy the arena either. Thus the arena may be "orphaned".
//
// In both cases we cannot dereference arena pointer after the refcount is
// decremented, as our arena may already be destroyed.
//
// If this is the external thread, the market is protected by refcount to it.
// In case of workers market's liveness is ensured by the RML connection
// rundown protocol, according to which the client (i.e. the market) lives
// until RML server notifies it about connection termination, and this
// notification is fired only after all workers return into RML.
//
// Thus if we decremented refcount to zero we ask the market to check arena
// state (including the fact if it is alive) under the lock.
//
__TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter");
// When there is no workers someone must free arena, as
// without workers, no one calls out_of_work().
if (ref_param == ref_external && !my_mandatory_concurrency.test()) {
out_of_work();
}
threading_control* tc = my_threading_control;
auto tc_client_snapshot = tc->prepare_client_destruction(my_tc_client);
// Release our reference to sync with destroy_client
unsigned remaining_ref = my_references.fetch_sub(ref_param, std::memory_order_release) - ref_param;
// do not access `this` it might be destroyed already
if (remaining_ref == 0) {
if (tc->try_destroy_client(tc_client_snapshot)) {
// We are requested to destroy ourself
free_arena();
}
}
}
std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) {
if ( lower >= upper ) return out_of_arena;
// Start search for an empty slot from the one we occupied the last time
std::size_t index = tls.my_arena_index;
if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower;
__TBB_ASSERT( index >= lower && index < upper, nullptr);
// Find a free slot
for ( std::size_t i = index; i < upper; ++i )
if (my_slots[i].try_occupy()) return i;
for ( std::size_t i = lower; i < index; ++i )
if (my_slots[i].try_occupy()) return i;
return out_of_arena;
}
template <bool as_worker>
std::size_t arena::occupy_free_slot(thread_data& tls) {
// Firstly, external threads try to occupy reserved slots
std::size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( tls, 0, my_num_reserved_slots );
if ( index == out_of_arena ) {
// Secondly, all threads try to occupy all non-reserved slots
index = occupy_free_slot_in_range(tls, my_num_reserved_slots, my_num_slots );
// Likely this arena is already saturated
if ( index == out_of_arena )
return out_of_arena;
}
atomic_update( my_limit, (unsigned)(index + 1), std::less<unsigned>() );
return index;
}
std::uintptr_t arena::calculate_stealing_threshold() {
stack_anchor_type anchor;
return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_threading_control->worker_stack_size());
}
void arena::process(thread_data& tls) {
governor::set_thread_data(tls); // TODO: consider moving to create_one_job.
__TBB_ASSERT( is_alive(my_guard), nullptr);
__TBB_ASSERT( my_num_slots >= 1, nullptr);
std::size_t index = occupy_free_slot</*as_worker*/true>(tls);
if (index == out_of_arena) {
on_thread_leaving(ref_worker);
return;
}
__TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" );
tls.attach_arena(*this, index);
// worker thread enters the dispatch loop to look for a work
tls.my_inbox.set_is_idle(true);
if (tls.my_arena_slot->is_task_pool_published()) {
tls.my_inbox.set_is_idle(false);
}
task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher();
tls.enter_task_dispatcher(task_disp, calculate_stealing_threshold());
__TBB_ASSERT(task_disp.can_steal(), nullptr);
__TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" );
my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker);
// Waiting on special object tied to this arena
outermost_worker_waiter waiter(*this);
d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter);
// For purposes of affinity support, the slot's mailbox is considered idle while no thread is
// attached to it.
tls.my_inbox.set_is_idle(true);
__TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task");
__TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr);
__TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr);
my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker);
tls.my_last_observer = nullptr;
tls.leave_task_dispatcher();
// Arena slot detach (arena may be used in market::process)
// TODO: Consider moving several calls below into a new method(e.g.detach_arena).
tls.my_arena_slot->release();
tls.my_arena_slot = nullptr;
tls.my_inbox.detach();
__TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr);
__TBB_ASSERT(is_alive(my_guard), nullptr);
// In contrast to earlier versions of TBB (before 3.0 U5) now it is possible
// that arena may be temporarily left unpopulated by threads. See comments in
// arena::on_thread_leaving() for more details.
on_thread_leaving(ref_worker);
__TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join");
}
arena::arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level) {
__TBB_ASSERT( !my_guard, "improperly allocated arena?" );
__TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" );
__TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" );
my_threading_control = control;
my_limit = 1;
// Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks).
my_num_slots = num_arena_slots(num_slots, num_reserved_slots);
my_num_reserved_slots = num_reserved_slots;
my_max_num_workers = num_slots-num_reserved_slots;
my_priority_level = priority_level;
my_references = ref_external; // accounts for the external thread
my_observers.my_arena = this;
my_co_cache.init(4 * num_slots);
__TBB_ASSERT ( my_max_num_workers <= my_num_slots, nullptr);
// Initialize the default context. It should be allocated before task_dispatch construction.
my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context)))
d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings };
// Construct slots. Mark internal synchronization elements for the tools.
task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots);
for( unsigned i = 0; i < my_num_slots; ++i ) {
// __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, nullptr);
__TBB_ASSERT( !my_slots[i].task_pool_ptr, nullptr);
__TBB_ASSERT( !my_slots[i].my_task_pool_size, nullptr);
mailbox(i).construct();
my_slots[i].init_task_streams(i);
my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this);
my_slots[i].my_is_occupied.store(false, std::memory_order_relaxed);
}
my_fifo_task_stream.initialize(my_num_slots);
my_resume_task_stream.initialize(my_num_slots);
#if __TBB_PREVIEW_CRITICAL_TASKS
my_critical_task_stream.initialize(my_num_slots);
#endif
my_mandatory_requests = 0;
}
arena& arena::allocate_arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots,
unsigned priority_level)
{
__TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" );
__TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" );
__TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" );
std::size_t n = allocation_size(num_arena_slots(num_slots, num_reserved_slots));
unsigned char* storage = (unsigned char*)cache_aligned_allocate(n);
// Zero all slots to indicate that they are empty
std::memset( storage, 0, n );
return *new( storage + num_arena_slots(num_slots, num_reserved_slots) * sizeof(mail_outbox) )
arena(control, num_slots, num_reserved_slots, priority_level);
}
void arena::free_arena () {
__TBB_ASSERT( is_alive(my_guard), nullptr);
__TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" );
__TBB_ASSERT( !my_total_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" );
__TBB_ASSERT( is_empty(), "Inconsistent state of a dying arena" );
#if __TBB_ARENA_BINDING
if (my_numa_binding_observer != nullptr) {
destroy_binding_observer(my_numa_binding_observer);
my_numa_binding_observer = nullptr;
}
#endif /*__TBB_ARENA_BINDING*/
poison_value( my_guard );
for ( unsigned i = 0; i < my_num_slots; ++i ) {
// __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" );
// TODO: understand the assertion and modify
// __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, nullptr);
__TBB_ASSERT( my_slots[i].head == my_slots[i].tail, nullptr); // TODO: replace by is_quiescent_local_task_pool_empty
my_slots[i].free_task_pool();
mailbox(i).drain();
my_slots[i].my_default_task_dispatcher->~task_dispatcher();
}
__TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed");
__TBB_ASSERT(my_resume_task_stream.empty(), "Not all enqueued tasks were executed");
// Cleanup coroutines/schedulers cache
my_co_cache.cleanup();
my_default_ctx->~task_group_context();
cache_aligned_deallocate(my_default_ctx);
#if __TBB_PREVIEW_CRITICAL_TASKS
__TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed");
#endif
// Clear enfources synchronization with observe(false)
my_observers.clear();
void* storage = &mailbox(my_num_slots-1);
__TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, nullptr);
this->~arena();
#if TBB_USE_ASSERT > 1
std::memset( storage, 0, allocation_size(my_num_slots) );
#endif /* TBB_USE_ASSERT */
cache_aligned_deallocate( storage );
}
bool arena::has_enqueued_tasks() {
return !my_fifo_task_stream.empty();
}
void arena::request_workers(int mandatory_delta, int workers_delta, bool wakeup_threads) {
my_threading_control->adjust_demand(my_tc_client, mandatory_delta, workers_delta);
if (wakeup_threads) {
// Notify all sleeping threads that work has appeared in the arena.
get_waiting_threads_monitor().notify([&] (market_context context) {
return this == context.my_arena_addr;
});
}
}
bool arena::has_tasks() {
// TODO: rework it to return at least a hint about where a task was found; better if the task itself.
std::size_t n = my_limit.load(std::memory_order_acquire);
bool tasks_are_available = false;
for (std::size_t k = 0; k < n && !tasks_are_available; ++k) {
tasks_are_available = !my_slots[k].is_empty();
}
tasks_are_available = tasks_are_available || has_enqueued_tasks() || !my_resume_task_stream.empty();
#if __TBB_PREVIEW_CRITICAL_TASKS
tasks_are_available = tasks_are_available || !my_critical_task_stream.empty();
#endif
return tasks_are_available;
}
void arena::out_of_work() {
// We should try unset my_pool_state first due to keep arena invariants in consistent state
// Otherwise, we might have my_pool_state = false and my_mandatory_concurrency = true that is broken invariant
bool disable_mandatory = my_mandatory_concurrency.try_clear_if([this] { return !has_enqueued_tasks(); });
bool release_workers = my_pool_state.try_clear_if([this] { return !has_tasks(); });
if (disable_mandatory || release_workers) {
int mandatory_delta = disable_mandatory ? -1 : 0;
int workers_delta = release_workers ? -(int)my_max_num_workers : 0;
if (disable_mandatory && is_arena_workerless()) {
// We had set workers_delta to 1 when enabled mandatory concurrency, so revert it now
workers_delta = -1;
}
request_workers(mandatory_delta, workers_delta);
}
}
void arena::set_top_priority(bool is_top_priority) {
my_is_top_priority.store(is_top_priority, std::memory_order_relaxed);
}
bool arena::is_top_priority() const {
return my_is_top_priority.load(std::memory_order_relaxed);
}
bool arena::try_join() {
if (num_workers_active() < my_num_workers_allotted.load(std::memory_order_relaxed)) {
my_references += arena::ref_worker;
return true;
}
return false;
}
void arena::set_allotment(unsigned allotment) {
if (my_num_workers_allotted.load(std::memory_order_relaxed) != allotment) {
my_num_workers_allotted.store(allotment, std::memory_order_relaxed);
}
}
std::pair<int, int> arena::update_request(int mandatory_delta, int workers_delta) {
__TBB_ASSERT(-1 <= mandatory_delta && mandatory_delta <= 1, nullptr);
int min_workers_request = 0;
int max_workers_request = 0;
// Calculate min request
my_mandatory_requests += mandatory_delta;
min_workers_request = my_mandatory_requests > 0 ? 1 : 0;
// Calculate max request
my_total_num_workers_requested += workers_delta;
// Clamp worker request into interval [0, my_max_num_workers]
max_workers_request = clamp(my_total_num_workers_requested, 0,
min_workers_request > 0 && is_arena_workerless() ? 1 : (int)my_max_num_workers);
return { min_workers_request, max_workers_request };
}
thread_control_monitor& arena::get_waiting_threads_monitor() {
return my_threading_control->get_waiting_threads_monitor();
}
void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) {
task_group_context_impl::bind_to(ctx, &td);
task_accessor::context(t) = &ctx;
task_accessor::isolation(t) = no_isolation;
my_fifo_task_stream.push( &t, random_lane_selector(td.my_random) );
advertise_new_work<work_enqueued>();
}
arena& arena::create(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned arena_priority_level)
{
__TBB_ASSERT(num_slots > 0, NULL);
__TBB_ASSERT(num_reserved_slots <= num_slots, NULL);
// Add public market reference for an external thread/task_arena (that adds an internal reference in exchange).
arena& a = arena::allocate_arena(control, num_slots, num_reserved_slots, arena_priority_level);
a.my_tc_client = control->create_client(a);
// We should not publish arena until all fields are initialized
control->publish_client(a.my_tc_client);
return a;
}
} // namespace r1
} // namespace detail
} // namespace tbb
// Enable task_arena.h
#include "third_party/tbb/task_arena.hh" // task_arena_base
namespace tbb {
namespace detail {
namespace r1 {
#if TBB_USE_ASSERT
void assert_arena_priority_valid( tbb::task_arena::priority a_priority ) {
bool is_arena_priority_correct =
a_priority == tbb::task_arena::priority::high ||
a_priority == tbb::task_arena::priority::normal ||
a_priority == tbb::task_arena::priority::low;
__TBB_ASSERT( is_arena_priority_correct,
"Task arena priority should be equal to one of the predefined values." );
}
#else
void assert_arena_priority_valid( tbb::task_arena::priority ) {}
#endif
unsigned arena_priority_level( tbb::task_arena::priority a_priority ) {
assert_arena_priority_valid( a_priority );
return d1::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride);
}
tbb::task_arena::priority arena_priority( unsigned priority_level ) {
auto priority = tbb::task_arena::priority(
(d1::num_priority_levels - priority_level) * d1::priority_stride
);
assert_arena_priority_valid( priority );
return priority;
}
struct task_arena_impl {
static void initialize(d1::task_arena_base&);
static void terminate(d1::task_arena_base&);
static bool attach(d1::task_arena_base&);
static void execute(d1::task_arena_base&, d1::delegate_base&);
static void wait(d1::task_arena_base&);
static int max_concurrency(const d1::task_arena_base*);
static void enqueue(d1::task&, d1::task_group_context*, d1::task_arena_base*);
};
void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) {
task_arena_impl::initialize(ta);
}
void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base& ta) {
task_arena_impl::terminate(ta);
}
bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base& ta) {
return task_arena_impl::attach(ta);
}
void __TBB_EXPORTED_FUNC execute(d1::task_arena_base& ta, d1::delegate_base& d) {
task_arena_impl::execute(ta, d);
}
void __TBB_EXPORTED_FUNC wait(d1::task_arena_base& ta) {
task_arena_impl::wait(ta);
}
int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) {
return task_arena_impl::max_concurrency(ta);
}
void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) {
task_arena_impl::enqueue(t, nullptr, ta);
}
void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_group_context& ctx, d1::task_arena_base* ta) {
task_arena_impl::enqueue(t, &ctx, ta);
}
void task_arena_impl::initialize(d1::task_arena_base& ta) {
// Enforce global market initialization to properly initialize soft limit
(void)governor::get_thread_data();
if (ta.my_max_concurrency < 1) {
#if __TBB_ARENA_BINDING
d1::constraints arena_constraints = d1::constraints{}
.set_core_type(ta.core_type())
.set_max_threads_per_core(ta.max_threads_per_core())
.set_numa_id(ta.my_numa_id);
ta.my_max_concurrency = (int)default_concurrency(arena_constraints);
#else /*!__TBB_ARENA_BINDING*/
ta.my_max_concurrency = (int)governor::default_num_threads();
#endif /*!__TBB_ARENA_BINDING*/
}
__TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized");
unsigned priority_level = arena_priority_level(ta.my_priority);
threading_control* thr_control = threading_control::register_public_reference();
arena& a = arena::create(thr_control, unsigned(ta.my_max_concurrency), ta.my_num_reserved_slots, priority_level);
ta.my_arena.store(&a, std::memory_order_release);
#if __TBB_ARENA_BINDING
a.my_numa_binding_observer = construct_binding_observer(
static_cast<d1::task_arena*>(&ta), a.my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core());
#endif /*__TBB_ARENA_BINDING*/
}
void task_arena_impl::terminate(d1::task_arena_base& ta) {
arena* a = ta.my_arena.load(std::memory_order_relaxed);
assert_pointer_valid(a);
threading_control::unregister_public_reference(/*blocking_terminate=*/false);
a->on_thread_leaving(arena::ref_external);
ta.my_arena.store(nullptr, std::memory_order_relaxed);
}
bool task_arena_impl::attach(d1::task_arena_base& ta) {
__TBB_ASSERT(!ta.my_arena.load(std::memory_order_relaxed), nullptr);
thread_data* td = governor::get_thread_data_if_initialized();
if( td && td->my_arena ) {
arena* a = td->my_arena;
// There is an active arena to attach to.
// It's still used by s, so won't be destroyed right away.
__TBB_ASSERT(a->my_references > 0, nullptr);
a->my_references += arena::ref_external;
ta.my_num_reserved_slots = a->my_num_reserved_slots;
ta.my_priority = arena_priority(a->my_priority_level);
ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers;
__TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency, ta.my_num_reserved_slots) == a->my_num_slots, nullptr);
ta.my_arena.store(a, std::memory_order_release);
// increases threading_control's ref count for task_arena
threading_control::register_public_reference();
return true;
}
return false;
}
void task_arena_impl::enqueue(d1::task& t, d1::task_group_context* c, d1::task_arena_base* ta) {
thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance
assert_pointer_valid(td, "thread_data pointer should not be null");
arena* a = ta ?
ta->my_arena.load(std::memory_order_relaxed)
: td->my_arena
;
assert_pointer_valid(a, "arena pointer should not be null");
auto* ctx = c ? c : a->my_default_ctx;
assert_pointer_valid(ctx, "context pointer should not be null");
// Is there a better place for checking the state of ctx?
__TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(),
"The task will not be executed because its task_group_context is cancelled.");
a->enqueue_task(t, *ctx, *td);
}
class nested_arena_context : no_copy {
public:
nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index)
: m_orig_execute_data_ext(td.my_task_dispatcher->m_execute_data_ext)
{
if (td.my_arena != &nested_arena) {
m_orig_arena = td.my_arena;
m_orig_slot_index = td.my_arena_index;
m_orig_last_observer = td.my_last_observer;
td.detach_task_dispatcher();
td.attach_arena(nested_arena, slot_index);
if (td.my_inbox.is_idle_state(true))
td.my_inbox.set_is_idle(false);
task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
td.enter_task_dispatcher(task_disp, m_orig_execute_data_ext.task_disp->m_stealing_threshold);
// If the calling thread occupies the slots out of external thread reserve we need to notify the
// market that this arena requires one worker less.
if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) {
td.my_arena->request_workers(/* mandatory_delta = */ 0, /* workers_delta = */ -1);
}
td.my_last_observer = nullptr;
// The task_arena::execute method considers each calling thread as an external thread.
td.my_arena->my_observers.notify_entry_observers(td.my_last_observer, /* worker*/false);
}
m_task_dispatcher = td.my_task_dispatcher;
m_orig_fifo_tasks_allowed = m_task_dispatcher->allow_fifo_task(true);
m_orig_critical_task_allowed = m_task_dispatcher->m_properties.critical_task_allowed;
m_task_dispatcher->m_properties.critical_task_allowed = true;
execution_data_ext& ed_ext = td.my_task_dispatcher->m_execute_data_ext;
ed_ext.context = td.my_arena->my_default_ctx;
ed_ext.original_slot = td.my_arena_index;
ed_ext.affinity_slot = d1::no_slot;
ed_ext.task_disp = td.my_task_dispatcher;
ed_ext.isolation = no_isolation;
__TBB_ASSERT(td.my_arena_slot, nullptr);
__TBB_ASSERT(td.my_arena_slot->is_occupied(), nullptr);
__TBB_ASSERT(td.my_task_dispatcher, nullptr);
}
~nested_arena_context() {
thread_data& td = *m_task_dispatcher->m_thread_data;
__TBB_ASSERT(governor::is_thread_data_set(&td), nullptr);
m_task_dispatcher->allow_fifo_task(m_orig_fifo_tasks_allowed);
m_task_dispatcher->m_properties.critical_task_allowed = m_orig_critical_task_allowed;
if (m_orig_arena) {
td.my_arena->my_observers.notify_exit_observers(td.my_last_observer, /*worker*/ false);
td.my_last_observer = m_orig_last_observer;
// Notify the market that this thread releasing a one slot
// that can be used by a worker thread.
if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) {
td.my_arena->request_workers(/* mandatory_delta = */ 0, /* workers_delta = */ 1);
}
td.leave_task_dispatcher();
td.my_arena_slot->release();
td.my_arena->my_exit_monitors.notify_one(); // do not relax!
td.attach_arena(*m_orig_arena, m_orig_slot_index);
td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp);
__TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
}
td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext;
}
private:
execution_data_ext m_orig_execute_data_ext{};
arena* m_orig_arena{ nullptr };
observer_proxy* m_orig_last_observer{ nullptr };
task_dispatcher* m_task_dispatcher{ nullptr };
unsigned m_orig_slot_index{};
bool m_orig_fifo_tasks_allowed{};
bool m_orig_critical_task_allowed{};
};
class delegated_task : public d1::task {
d1::delegate_base& m_delegate;
concurrent_monitor& m_monitor;
d1::wait_context& m_wait_ctx;
std::atomic<bool> m_completed;
d1::task* execute(d1::execution_data& ed) override {
const execution_data_ext& ed_ext = static_cast<const execution_data_ext&>(ed);
execution_data_ext orig_execute_data_ext = ed_ext.task_disp->m_execute_data_ext;
__TBB_ASSERT(&ed_ext.task_disp->m_execute_data_ext == &ed,
"The execute data shall point to the current task dispatcher execute data");
__TBB_ASSERT(ed_ext.task_disp->m_execute_data_ext.isolation == no_isolation, nullptr);
ed_ext.task_disp->m_execute_data_ext.context = ed_ext.task_disp->get_thread_data().my_arena->my_default_ctx;
bool fifo_task_allowed = ed_ext.task_disp->allow_fifo_task(true);
try_call([&] {
m_delegate();
}).on_completion([&] {
ed_ext.task_disp->m_execute_data_ext = orig_execute_data_ext;
ed_ext.task_disp->allow_fifo_task(fifo_task_allowed);
});
finalize();
return nullptr;
}
d1::task* cancel(d1::execution_data&) override {
finalize();
return nullptr;
}
void finalize() {
m_wait_ctx.release(); // must precede the wakeup
m_monitor.notify([this] (std::uintptr_t ctx) {
return ctx == std::uintptr_t(&m_delegate);
}); // do not relax, it needs a fence!
m_completed.store(true, std::memory_order_release);
}
public:
delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo)
: m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{}
~delegated_task() override {
// The destructor can be called earlier than the m_monitor is notified
// because the waiting thread can be released after m_wait_ctx.release_wait.
// To close that race we wait for the m_completed signal.
spin_wait_until_eq(m_completed, true);
}
};
void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) {
arena* a = ta.my_arena.load(std::memory_order_relaxed);
__TBB_ASSERT(a != nullptr, nullptr);
thread_data* td = governor::get_thread_data();
bool same_arena = td->my_arena == a;
std::size_t index1 = td->my_arena_index;
if (!same_arena) {
index1 = a->occupy_free_slot</*as_worker */false>(*td);
if (index1 == arena::out_of_arena) {
concurrent_monitor::thread_context waiter((std::uintptr_t)&d);
d1::wait_context wo(1);
d1::task_group_context exec_context(d1::task_group_context::isolated);
task_group_context_impl::copy_fp_settings(exec_context, *a->my_default_ctx);
delegated_task dt(d, a->my_exit_monitors, wo);
a->enqueue_task( dt, exec_context, *td);
size_t index2 = arena::out_of_arena;
do {
a->my_exit_monitors.prepare_wait(waiter);
if (!wo.continue_execution()) {
a->my_exit_monitors.cancel_wait(waiter);
break;
}
index2 = a->occupy_free_slot</*as_worker*/false>(*td);
if (index2 != arena::out_of_arena) {
a->my_exit_monitors.cancel_wait(waiter);
nested_arena_context scope(*td, *a, index2 );
r1::wait(wo, exec_context);
__TBB_ASSERT(!exec_context.my_exception.load(std::memory_order_relaxed), nullptr); // exception can be thrown above, not deferred
break;
}
a->my_exit_monitors.commit_wait(waiter);
} while (wo.continue_execution());
if (index2 == arena::out_of_arena) {
// notify a waiting thread even if this thread did not enter arena,
// in case it was woken by a leaving thread but did not need to enter
a->my_exit_monitors.notify_one(); // do not relax!
}
// process possible exception
auto exception = exec_context.my_exception.load(std::memory_order_acquire);
if (exception) {
__TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled.");
exception->throw_self();
}
__TBB_ASSERT(governor::is_thread_data_set(td), nullptr);
return;
} // if (index1 == arena::out_of_arena)
} // if (!same_arena)
context_guard_helper</*report_tasks=*/false> context_guard;
context_guard.set_ctx(a->my_default_ctx);
nested_arena_context scope(*td, *a, index1);
#if _WIN64
try {
#endif
d();
__TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr);
#if _WIN64
} catch (...) {
context_guard.restore_default();
throw;
}
#endif
}
void task_arena_impl::wait(d1::task_arena_base& ta) {
arena* a = ta.my_arena.load(std::memory_order_relaxed);
__TBB_ASSERT(a != nullptr, nullptr);
thread_data* td = governor::get_thread_data();
__TBB_ASSERT_EX(td, "Scheduler is not initialized");
__TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" );
if (a->my_max_num_workers != 0) {
while (a->num_workers_active() || !a->is_empty()) {
yield();
}
}
}
int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) {
arena* a = nullptr;
if( ta ) // for special cases of ta->max_concurrency()
a = ta->my_arena.load(std::memory_order_relaxed);
else if( thread_data* td = governor::get_thread_data_if_initialized() )
a = td->my_arena; // the current arena if any
if( a ) { // Get parameters from the arena
__TBB_ASSERT( !ta || ta->my_max_concurrency==1, nullptr);
int mandatory_worker = 0;
if (a->is_arena_workerless() && a->my_num_reserved_slots == 1) {
mandatory_worker = a->my_mandatory_concurrency.test() ? 1 : 0;
}
return a->my_num_reserved_slots + a->my_max_num_workers + mandatory_worker;
}
if (ta && ta->my_max_concurrency == 1) {
return 1;
}
#if __TBB_ARENA_BINDING
if (ta) {
d1::constraints arena_constraints = d1::constraints{}
.set_numa_id(ta->my_numa_id)
.set_core_type(ta->core_type())
.set_max_threads_per_core(ta->max_threads_per_core());
return (int)default_concurrency(arena_constraints);
}
#endif /*!__TBB_ARENA_BINDING*/
__TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, nullptr);
return int(governor::default_num_threads());
}
void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) {
// TODO: Decide what to do if the scheduler is not initialized. Is there a use case for it?
thread_data* tls = governor::get_thread_data();
assert_pointers_valid(tls, tls->my_task_dispatcher);
task_dispatcher* dispatcher = tls->my_task_dispatcher;
isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation;
try_call([&] {
// We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard.
isolation_type current_isolation = isolation ? isolation : reinterpret_cast<isolation_type>(&d);
// Save the current isolation value and set new one
previous_isolation = dispatcher->set_isolation(current_isolation);
// Isolation within this callable
d();
}).on_completion([&] {
__TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, nullptr);
dispatcher->set_isolation(previous_isolation);
});
}
} // namespace r1
} // namespace detail
} // namespace tbb

511
third_party/tbb/arena.hh vendored Normal file
View file

@ -0,0 +1,511 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_arena_H
#define _TBB_arena_H
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/cstring"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/spin_mutex.hh"
#include "third_party/tbb/scheduler_common.hh"
#include "third_party/tbb/intrusive_list.hh"
#include "third_party/tbb/task_stream.hh"
#include "third_party/tbb/arena_slot.hh"
#include "third_party/tbb/rml_tbb.hh"
#include "third_party/tbb/mailbox.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/concurrent_monitor.hh"
#include "third_party/tbb/observer_proxy.hh"
#include "third_party/tbb/thread_control_monitor.hh"
#include "third_party/tbb/threading_control_client.hh"
namespace tbb {
namespace detail {
namespace r1 {
class task_dispatcher;
class task_group_context;
class threading_control;
class allocate_root_with_context_proxy;
#if __TBB_ARENA_BINDING
class numa_binding_observer;
#endif /*__TBB_ARENA_BINDING*/
//! Bounded coroutines cache LIFO ring buffer
class arena_co_cache {
//! Ring buffer storage
task_dispatcher** my_co_scheduler_cache;
//! Current cache index
unsigned my_head;
//! Cache capacity for arena
unsigned my_max_index;
//! Accessor lock for modification operations
tbb::spin_mutex my_co_cache_mutex;
unsigned next_index() {
return ( my_head == my_max_index ) ? 0 : my_head + 1;
}
unsigned prev_index() {
return ( my_head == 0 ) ? my_max_index : my_head - 1;
}
bool internal_empty() {
return my_co_scheduler_cache[prev_index()] == nullptr;
}
void internal_task_dispatcher_cleanup(task_dispatcher* to_cleanup) {
to_cleanup->~task_dispatcher();
cache_aligned_deallocate(to_cleanup);
}
public:
void init(unsigned cache_capacity) {
std::size_t alloc_size = cache_capacity * sizeof(task_dispatcher*);
my_co_scheduler_cache = (task_dispatcher**)cache_aligned_allocate(alloc_size);
std::memset( my_co_scheduler_cache, 0, alloc_size );
my_head = 0;
my_max_index = cache_capacity - 1;
}
void cleanup() {
while (task_dispatcher* to_cleanup = pop()) {
internal_task_dispatcher_cleanup(to_cleanup);
}
cache_aligned_deallocate(my_co_scheduler_cache);
}
//! Insert scheduler to the current available place.
//! Replace an old value, if necessary.
void push(task_dispatcher* s) {
task_dispatcher* to_cleanup = nullptr;
{
tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex);
// Check if we are replacing some existing buffer entrance
if (my_co_scheduler_cache[my_head] != nullptr) {
to_cleanup = my_co_scheduler_cache[my_head];
}
// Store the cached value
my_co_scheduler_cache[my_head] = s;
// Move head index to the next slot
my_head = next_index();
}
// Cleanup replaced buffer if any
if (to_cleanup) {
internal_task_dispatcher_cleanup(to_cleanup);
}
}
//! Get a cached scheduler if any
task_dispatcher* pop() {
tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex);
// No cached coroutine
if (internal_empty()) {
return nullptr;
}
// Move head index to the currently available value
my_head = prev_index();
// Retrieve the value from the buffer
task_dispatcher* to_return = my_co_scheduler_cache[my_head];
// Clear the previous entrance value
my_co_scheduler_cache[my_head] = nullptr;
return to_return;
}
};
struct stack_anchor_type {
stack_anchor_type() = default;
stack_anchor_type(const stack_anchor_type&) = delete;
};
class atomic_flag {
static const std::uintptr_t SET = 1;
static const std::uintptr_t UNSET = 0;
std::atomic<std::uintptr_t> my_state{UNSET};
public:
bool test_and_set() {
std::uintptr_t state = my_state.load(std::memory_order_acquire);
switch (state) {
case SET:
return false;
default: /* busy */
if (my_state.compare_exchange_strong(state, SET)) {
// We interrupted clear transaction
return false;
}
if (state != UNSET) {
// We lost our epoch
return false;
}
// We are too late but still in the same epoch
__TBB_fallthrough;
case UNSET:
return my_state.compare_exchange_strong(state, SET);
}
}
template <typename Pred>
bool try_clear_if(Pred&& pred) {
std::uintptr_t busy = std::uintptr_t(&busy);
std::uintptr_t state = my_state.load(std::memory_order_acquire);
if (state == SET && my_state.compare_exchange_strong(state, busy)) {
if (pred()) {
return my_state.compare_exchange_strong(busy, UNSET);
}
// The result of the next operation is discarded, always false should be returned.
my_state.compare_exchange_strong(busy, SET);
}
return false;
}
void clear() {
my_state.store(UNSET, std::memory_order_release);
}
bool test(std::memory_order order = std::memory_order_acquire) {
return my_state.load(order) != UNSET;
}
};
//! The structure of an arena, except the array of slots.
/** Separated in order to simplify padding.
Intrusive list node base class is used by market to form a list of arenas. **/
// TODO: Analyze arena_base cache lines placement
struct arena_base : padded<intrusive_list_node> {
//! The number of workers that have been marked out by the resource manager to service the arena.
std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop
//! Reference counter for the arena.
/** Worker and external thread references are counted separately: first several bits are for references
from external thread threads or explicit task_arenas (see arena::ref_external_bits below);
the rest counts the number of workers servicing the arena. */
std::atomic<unsigned> my_references; // heavy use in stealing loop
//! The maximal number of currently busy slots.
std::atomic<unsigned> my_limit; // heavy use in stealing loop
//! Task pool for the tasks scheduled via task::enqueue() method.
/** Such scheduling guarantees eventual execution even if
- new tasks are constantly coming (by extracting scheduled tasks in
relaxed FIFO order);
- the enqueuing thread does not call any of wait_for_all methods. **/
task_stream<front_accessor> my_fifo_task_stream; // heavy use in stealing loop
//! Task pool for the tasks scheduled via tbb::resume() function.
task_stream<front_accessor> my_resume_task_stream; // heavy use in stealing loop
#if __TBB_PREVIEW_CRITICAL_TASKS
//! Task pool for the tasks with critical property set.
/** Critical tasks are scheduled for execution ahead of other sources (including local task pool
and even bypassed tasks) unless the thread already executes a critical task in an outer
dispatch loop **/
// used on the hot path of the task dispatch loop
task_stream<back_nonnull_accessor> my_critical_task_stream;
#endif
//! The total number of workers that are requested from the resource manager.
int my_total_num_workers_requested;
//! The index in the array of per priority lists of arenas this object is in.
/*const*/ unsigned my_priority_level;
//! The max priority level of arena in permit manager.
std::atomic<bool> my_is_top_priority{false};
//! Current task pool state and estimate of available tasks amount.
atomic_flag my_pool_state;
//! The list of local observers attached to this arena.
observer_list my_observers;
#if __TBB_ARENA_BINDING
//! Pointer to internal observer that allows to bind threads in arena to certain NUMA node.
numa_binding_observer* my_numa_binding_observer;
#endif /*__TBB_ARENA_BINDING*/
// Below are rarely modified members
threading_control* my_threading_control;
//! Default task group context.
d1::task_group_context* my_default_ctx;
//! Waiting object for external threads that cannot join the arena.
concurrent_monitor my_exit_monitors;
//! Coroutines (task_dispathers) cache buffer
arena_co_cache my_co_cache;
// arena needs an extra worker despite the arena limit
atomic_flag my_mandatory_concurrency;
// the number of local mandatory concurrency requests
int my_mandatory_requests;
//! The number of slots in the arena.
unsigned my_num_slots;
//! The number of reserved slots (can be occupied only by external threads).
unsigned my_num_reserved_slots;
//! The number of workers requested by the external thread owning the arena.
unsigned my_max_num_workers;
threading_control_client my_tc_client;
#if TBB_USE_ASSERT
//! Used to trap accesses to the object after its destruction.
std::uintptr_t my_guard;
#endif /* TBB_USE_ASSERT */
}; // struct arena_base
class arena: public padded<arena_base>
{
public:
using base_type = padded<arena_base>;
//! Types of work advertised by advertise_new_work()
enum new_work_type {
work_spawned,
wakeup,
work_enqueued
};
//! Constructor
arena(threading_control* control, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level);
//! Allocate an instance of arena.
static arena& allocate_arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots,
unsigned priority_level);
static arena& create(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned arena_priority_level);
static int unsigned num_arena_slots ( unsigned num_slots, unsigned num_reserved_slots ) {
return num_reserved_slots == 0 ? num_slots : max(2u, num_slots);
}
static int allocation_size( unsigned num_slots ) {
return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher));
}
//! Get reference to mailbox corresponding to given slot_id
mail_outbox& mailbox( d1::slot_id slot ) {
__TBB_ASSERT( slot != d1::no_slot, "affinity should be specified" );
return reinterpret_cast<mail_outbox*>(this)[-(int)(slot+1)]; // cast to 'int' is redundant but left for readability
}
//! Completes arena shutdown, destructs and deallocates it.
void free_arena();
//! The number of least significant bits for external references
static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers
//! Reference increment values for externals and workers
static const unsigned ref_external = 1;
static const unsigned ref_worker = 1 << ref_external_bits;
//! The number of workers active in the arena.
unsigned num_workers_active() const {
return my_references.load(std::memory_order_acquire) >> ref_external_bits;
}
//! Check if the recall is requested by the market.
bool is_recall_requested() const {
return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed);
}
void request_workers(int mandatory_delta, int workers_delta, bool wakeup_threads = false);
//! If necessary, raise a flag that there is new job in arena.
template<arena::new_work_type work_type> void advertise_new_work();
//! Attempts to steal a task from a randomly chosen arena slot
d1::task* steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation);
//! Get a task from a global starvation resistant queue
template<task_stream_accessor_type accessor>
d1::task* get_stream_task(task_stream<accessor>& stream, unsigned& hint);
#if __TBB_PREVIEW_CRITICAL_TASKS
//! Tries to find a critical task in global critical task stream
d1::task* get_critical_task(unsigned& hint, isolation_type isolation);
#endif
//! Check if there is job anywhere in arena.
void out_of_work();
//! enqueue a task into starvation-resistance queue
void enqueue_task(d1::task&, d1::task_group_context&, thread_data&);
//! Registers the worker with the arena and enters TBB scheduler dispatch loop
void process(thread_data&);
//! Notification that the thread leaves its arena
void on_thread_leaving(unsigned ref_param);
//! Check for the presence of enqueued tasks
bool has_enqueued_tasks();
//! Check for the presence of any tasks
bool has_tasks();
bool is_empty() { return my_pool_state.test() == /* EMPTY */ false; }
thread_control_monitor& get_waiting_threads_monitor();
static const std::size_t out_of_arena = ~size_t(0);
//! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena.
template <bool as_worker>
std::size_t occupy_free_slot(thread_data&);
//! Tries to occupy a slot in the specified range.
std::size_t occupy_free_slot_in_range(thread_data& tls, std::size_t lower, std::size_t upper);
std::uintptr_t calculate_stealing_threshold();
unsigned priority_level() { return my_priority_level; }
bool has_request() { return my_total_num_workers_requested; }
unsigned references() const { return my_references.load(std::memory_order_acquire); }
bool is_arena_workerless() const { return my_max_num_workers == 0; }
void set_top_priority(bool);
bool is_top_priority() const;
bool try_join();
void set_allotment(unsigned allotment);
std::pair</*min workers = */ int, /*max workers = */ int> update_request(int mandatory_delta, int workers_delta);
/** Must be the last data field */
arena_slot my_slots[1];
}; // class arena
template <arena::new_work_type work_type>
void arena::advertise_new_work() {
bool is_mandatory_needed = false;
bool are_workers_needed = false;
if (work_type != work_spawned) {
// Local memory fence here and below is required to avoid missed wakeups; see the comment below.
// Starvation resistant tasks require concurrency, so missed wakeups are unacceptable.
atomic_fence_seq_cst();
}
if (work_type == work_enqueued && my_num_slots > my_num_reserved_slots) {
is_mandatory_needed = my_mandatory_concurrency.test_and_set();
}
// Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences.
// Technically, to avoid missed wakeups, there should be a full memory fence between the point we
// released the task pool (i.e. spawned task) and read the arena's state. However, adding such a
// fence might hurt overall performance more than it helps, because the fence would be executed
// on every task pool release, even when stealing does not occur. Since TBB allows parallelism,
// but never promises parallelism, the missed wakeup is not a correctness problem.
are_workers_needed = my_pool_state.test_and_set();
if (is_mandatory_needed || are_workers_needed) {
int mandatory_delta = is_mandatory_needed ? 1 : 0;
int workers_delta = are_workers_needed ? my_max_num_workers : 0;
if (is_mandatory_needed && is_arena_workerless()) {
// Set workers_delta to 1 to keep arena invariants consistent
workers_delta = 1;
}
bool wakeup_workers = is_mandatory_needed || are_workers_needed;
request_workers(mandatory_delta, workers_delta, wakeup_workers);
}
}
inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation) {
auto slot_num_limit = my_limit.load(std::memory_order_relaxed);
if (slot_num_limit == 1) {
// No slots to steal from
return nullptr;
}
// Try to steal a task from a random victim.
std::size_t k = frnd.get() % (slot_num_limit - 1);
// The following condition excludes the external thread that might have
// already taken our previous place in the arena from the list .
// of potential victims. But since such a situation can take
// place only in case of significant oversubscription, keeping
// the checks simple seems to be preferable to complicating the code.
if (k >= arena_index) {
++k; // Adjusts random distribution to exclude self
}
arena_slot* victim = &my_slots[k];
d1::task **pool = victim->task_pool.load(std::memory_order_relaxed);
d1::task *t = nullptr;
if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation, k))) {
return nullptr;
}
if (task_accessor::is_proxy_task(*t)) {
task_proxy &tp = *(task_proxy*)t;
d1::slot_id slot = tp.slot;
t = tp.extract_task<task_proxy::pool_bit>();
if (!t) {
// Proxy was empty, so it's our responsibility to free it
tp.allocator.delete_object(&tp, ed);
return nullptr;
}
// Note affinity is called for any stolen task (proxy or general)
ed.affinity_slot = slot;
} else {
// Note affinity is called for any stolen task (proxy or general)
ed.affinity_slot = d1::any_slot;
}
// Update task owner thread id to identify stealing
ed.original_slot = k;
return t;
}
template<task_stream_accessor_type accessor>
inline d1::task* arena::get_stream_task(task_stream<accessor>& stream, unsigned& hint) {
if (stream.empty())
return nullptr;
return stream.pop(subsequent_lane_selector(hint));
}
#if __TBB_PREVIEW_CRITICAL_TASKS
// Retrieves critical task respecting isolation level, if provided. The rule is:
// 1) If no outer critical task and no isolation => take any critical task
// 2) If working on an outer critical task and no isolation => cannot take any critical task
// 3) If no outer critical task but isolated => respect isolation
// 4) If working on an outer critical task and isolated => respect isolation
// Hint is used to keep some LIFO-ness, start search with the lane that was used during push operation.
inline d1::task* arena::get_critical_task(unsigned& hint, isolation_type isolation) {
if (my_critical_task_stream.empty())
return nullptr;
if ( isolation != no_isolation ) {
return my_critical_task_stream.pop_specific( hint, isolation );
} else {
return my_critical_task_stream.pop(preceding_lane_selector(hint));
}
}
#endif // __TBB_PREVIEW_CRITICAL_TASKS
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_arena_H */

219
third_party/tbb/arena_slot.cc vendored Normal file
View file

@ -0,0 +1,219 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/arena_slot.hh"
#include "third_party/tbb/arena.hh"
#include "third_party/tbb/thread_data.hh"
namespace tbb {
namespace detail {
namespace r1 {
//------------------------------------------------------------------------
// Arena Slot
//------------------------------------------------------------------------
d1::task* arena_slot::get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation) {
__TBB_ASSERT(tail.load(std::memory_order_relaxed) <= T || is_local_task_pool_quiescent(),
"Is it safe to get a task at position T?");
d1::task* result = task_pool_ptr[T];
__TBB_ASSERT(!is_poisoned( result ), "The poisoned task is going to be processed");
if (!result) {
return nullptr;
}
bool omit = isolation != no_isolation && isolation != task_accessor::isolation(*result);
if (!omit && !task_accessor::is_proxy_task(*result)) {
return result;
} else if (omit) {
tasks_omitted = true;
return nullptr;
}
task_proxy& tp = static_cast<task_proxy&>(*result);
d1::slot_id aff_id = tp.slot;
if ( d1::task *t = tp.extract_task<task_proxy::pool_bit>() ) {
ed.affinity_slot = aff_id;
return t;
}
// Proxy was empty, so it's our responsibility to free it
tp.allocator.delete_object(&tp, ed);
if ( tasks_omitted ) {
task_pool_ptr[T] = nullptr;
}
return nullptr;
}
d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) {
__TBB_ASSERT(is_task_pool_published(), nullptr);
// The current task position in the task pool.
std::size_t T0 = tail.load(std::memory_order_relaxed);
// The bounds of available tasks in the task pool. H0 is only used when the head bound is reached.
std::size_t H0 = (std::size_t)-1, T = T0;
d1::task* result = nullptr;
bool task_pool_empty = false;
bool tasks_omitted = false;
do {
__TBB_ASSERT( !result, nullptr );
// The full fence is required to sync the store of `tail` with the load of `head` (write-read barrier)
T = --tail;
// The acquire load of head is required to guarantee consistency of our task pool
// when a thief rolls back the head.
if ( (std::intptr_t)( head.load(std::memory_order_acquire) ) > (std::intptr_t)T ) {
acquire_task_pool();
H0 = head.load(std::memory_order_relaxed);
if ( (std::intptr_t)H0 > (std::intptr_t)T ) {
// The thief has not backed off - nothing to grab.
__TBB_ASSERT( H0 == head.load(std::memory_order_relaxed)
&& T == tail.load(std::memory_order_relaxed)
&& H0 == T + 1, "victim/thief arbitration algorithm failure" );
reset_task_pool_and_leave();
// No tasks in the task pool.
task_pool_empty = true;
break;
} else if ( H0 == T ) {
// There is only one task in the task pool.
reset_task_pool_and_leave();
task_pool_empty = true;
} else {
// Release task pool if there are still some tasks.
// After the release, the tail will be less than T, thus a thief
// will not attempt to get a task at position T.
release_task_pool();
}
}
result = get_task_impl( T, ed, tasks_omitted, isolation );
if ( result ) {
poison_pointer( task_pool_ptr[T] );
break;
} else if ( !tasks_omitted ) {
poison_pointer( task_pool_ptr[T] );
__TBB_ASSERT( T0 == T+1, nullptr );
T0 = T;
}
} while ( !result && !task_pool_empty );
if ( tasks_omitted ) {
if ( task_pool_empty ) {
// All tasks have been checked. The task pool should be in reset state.
// We just restore the bounds for the available tasks.
// TODO: Does it have sense to move them to the beginning of the task pool?
__TBB_ASSERT( is_quiescent_local_task_pool_reset(), nullptr );
if ( result ) {
// If we have a task, it should be at H0 position.
__TBB_ASSERT( H0 == T, nullptr );
++H0;
}
__TBB_ASSERT( H0 <= T0, nullptr );
if ( H0 < T0 ) {
// Restore the task pool if there are some tasks.
head.store(H0, std::memory_order_relaxed);
tail.store(T0, std::memory_order_relaxed);
// The release fence is used in publish_task_pool.
publish_task_pool();
// Synchronize with snapshot as we published some tasks.
ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>();
}
} else {
// A task has been obtained. We need to make a hole in position T.
__TBB_ASSERT( is_task_pool_published(), nullptr );
__TBB_ASSERT( result, nullptr );
task_pool_ptr[T] = nullptr;
tail.store(T0, std::memory_order_release);
// Synchronize with snapshot as we published some tasks.
// TODO: consider some approach not to call wakeup for each time. E.g. check if the tail reached the head.
ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>();
}
}
__TBB_ASSERT( (std::intptr_t)tail.load(std::memory_order_relaxed) >= 0, nullptr );
__TBB_ASSERT( result || tasks_omitted || is_quiescent_local_task_pool_reset(), nullptr );
return result;
}
d1::task* arena_slot::steal_task(arena& a, isolation_type isolation, std::size_t slot_index) {
d1::task** victim_pool = lock_task_pool();
if (!victim_pool) {
return nullptr;
}
d1::task* result = nullptr;
std::size_t H = head.load(std::memory_order_relaxed); // mirror
std::size_t H0 = H;
bool tasks_omitted = false;
do {
// The full fence is required to sync the store of `head` with the load of `tail` (write-read barrier)
H = ++head;
// The acquire load of tail is required to guarantee consistency of victim_pool
// because the owner synchronizes task spawning via tail.
if ((std::intptr_t)H > (std::intptr_t)(tail.load(std::memory_order_acquire))) {
// Stealing attempt failed, deque contents has not been changed by us
head.store( /*dead: H = */ H0, std::memory_order_relaxed );
__TBB_ASSERT( !result, nullptr );
goto unlock;
}
result = victim_pool[H-1];
__TBB_ASSERT( !is_poisoned( result ), nullptr );
if (result) {
if (isolation == no_isolation || isolation == task_accessor::isolation(*result)) {
if (!task_accessor::is_proxy_task(*result)) {
break;
}
task_proxy& tp = *static_cast<task_proxy*>(result);
// If mailed task is likely to be grabbed by its destination thread, skip it.
if (!task_proxy::is_shared(tp.task_and_tag) || !tp.outbox->recipient_is_idle() || a.mailbox(slot_index).recipient_is_idle()) {
break;
}
}
// The task cannot be executed either due to isolation or proxy constraints.
result = nullptr;
tasks_omitted = true;
} else if (!tasks_omitted) {
// Cleanup the task pool from holes until a task is skipped.
__TBB_ASSERT( H0 == H-1, nullptr );
poison_pointer( victim_pool[H0] );
H0 = H;
}
} while (!result);
__TBB_ASSERT( result, nullptr );
// emit "task was consumed" signal
poison_pointer( victim_pool[H-1] );
if (tasks_omitted) {
// Some proxies in the task pool have been omitted. Set the stolen task to nullptr.
victim_pool[H-1] = nullptr;
// The release store synchronizes the victim_pool update(the store of nullptr).
head.store( /*dead: H = */ H0, std::memory_order_release );
}
unlock:
unlock_task_pool(victim_pool);
#if __TBB_PREFETCHING
__TBB_cl_evict(&victim_slot.head);
__TBB_cl_evict(&victim_slot.tail);
#endif
if (tasks_omitted) {
// Synchronize with snapshot as the head and tail can be bumped which can falsely trigger EMPTY state
a.advertise_new_work<arena::wakeup>();
}
return result;
}
} // namespace r1
} // namespace detail
} // namespace tbb

415
third_party/tbb/arena_slot.hh vendored Normal file
View file

@ -0,0 +1,415 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_arena_slot_H
#define _TBB_arena_slot_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/tbb/misc.hh"
#include "third_party/tbb/mailbox.hh"
#include "third_party/tbb/scheduler_common.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace r1 {
class arena;
class task_group_context;
//--------------------------------------------------------------------------------------------------------
// Arena Slot
//--------------------------------------------------------------------------------------------------------
static d1::task** const EmptyTaskPool = nullptr;
static d1::task** const LockedTaskPool = reinterpret_cast<d1::task**>(~std::intptr_t(0));
struct alignas(max_nfs_size) arena_slot_shared_state {
//! Scheduler of the thread attached to the slot
/** Marks the slot as busy, and is used to iterate through the schedulers belonging to this arena **/
std::atomic<bool> my_is_occupied;
// Synchronization of access to Task pool
/** Also is used to specify if the slot is empty or locked:
0 - empty
-1 - locked **/
std::atomic<d1::task**> task_pool;
//! Index of the first ready task in the deque.
/** Modified by thieves, and by the owner during compaction/reallocation **/
std::atomic<std::size_t> head;
};
struct alignas(max_nfs_size) arena_slot_private_state {
//! Hint provided for operations with the container of starvation-resistant tasks.
/** Modified by the owner thread (during these operations). **/
unsigned hint_for_fifo_stream;
#if __TBB_PREVIEW_CRITICAL_TASKS
//! Similar to 'hint_for_fifo_stream' but for critical tasks.
unsigned hint_for_critical_stream;
#endif
//! Similar to 'hint_for_fifo_stream' but for the resume tasks.
unsigned hint_for_resume_stream;
//! Index of the element following the last ready task in the deque.
/** Modified by the owner thread. **/
std::atomic<std::size_t> tail;
//! Capacity of the primary task pool (number of elements - pointers to task).
std::size_t my_task_pool_size;
//! Task pool of the scheduler that owns this slot
// TODO: previously was task**__TBB_atomic, but seems like not accessed on other thread
d1::task** task_pool_ptr;
};
class arena_slot : private arena_slot_shared_state, private arena_slot_private_state {
friend class arena;
friend class outermost_worker_waiter;
friend class task_dispatcher;
friend class thread_data;
friend class nested_arena_context;
//! The original task dispather associated with this slot
task_dispatcher* my_default_task_dispatcher;
#if TBB_USE_ASSERT
void fill_with_canary_pattern ( std::size_t first, std::size_t last ) {
for ( std::size_t i = first; i < last; ++i )
poison_pointer(task_pool_ptr[i]);
}
#else
void fill_with_canary_pattern ( size_t, std::size_t ) {}
#endif /* TBB_USE_ASSERT */
static constexpr std::size_t min_task_pool_size = 64;
void allocate_task_pool( std::size_t n ) {
std::size_t byte_size = ((n * sizeof(d1::task*) + max_nfs_size - 1) / max_nfs_size) * max_nfs_size;
my_task_pool_size = byte_size / sizeof(d1::task*);
task_pool_ptr = (d1::task**)cache_aligned_allocate(byte_size);
// No need to clear the fresh deque since valid items are designated by the head and tail members.
// But fill it with a canary pattern in the high vigilance debug mode.
fill_with_canary_pattern( 0, my_task_pool_size );
}
public:
//! Deallocate task pool that was allocated by means of allocate_task_pool.
void free_task_pool( ) {
// TODO: understand the assertion and modify
// __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, nullptr);
if( task_pool_ptr ) {
__TBB_ASSERT( my_task_pool_size, nullptr);
cache_aligned_deallocate( task_pool_ptr );
task_pool_ptr = nullptr;
my_task_pool_size = 0;
}
}
//! Get a task from the local pool.
/** Called only by the pool owner.
Returns the pointer to the task or nullptr if a suitable task is not found.
Resets the pool if it is empty. **/
d1::task* get_task(execution_data_ext&, isolation_type);
//! Steal task from slot's ready pool
d1::task* steal_task(arena&, isolation_type, std::size_t);
//! Some thread is now the owner of this slot
void occupy() {
__TBB_ASSERT(!my_is_occupied.load(std::memory_order_relaxed), nullptr);
my_is_occupied.store(true, std::memory_order_release);
}
//! Try to occupy the slot
bool try_occupy() {
return !is_occupied() && my_is_occupied.exchange(true) == false;
}
//! Some thread is now the owner of this slot
void release() {
__TBB_ASSERT(my_is_occupied.load(std::memory_order_relaxed), nullptr);
my_is_occupied.store(false, std::memory_order_release);
}
//! Spawn newly created tasks
void spawn(d1::task& t) {
std::size_t T = prepare_task_pool(1);
__TBB_ASSERT(is_poisoned(task_pool_ptr[T]), nullptr);
task_pool_ptr[T] = &t;
commit_spawned_tasks(T + 1);
if (!is_task_pool_published()) {
publish_task_pool();
}
}
bool is_task_pool_published() const {
return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool;
}
bool is_empty() const {
return task_pool.load(std::memory_order_relaxed) == EmptyTaskPool ||
head.load(std::memory_order_relaxed) >= tail.load(std::memory_order_relaxed);
}
bool is_occupied() const {
return my_is_occupied.load(std::memory_order_relaxed);
}
task_dispatcher& default_task_dispatcher() {
__TBB_ASSERT(my_default_task_dispatcher != nullptr, nullptr);
return *my_default_task_dispatcher;
}
void init_task_streams(unsigned h) {
hint_for_fifo_stream = h;
#if __TBB_RESUMABLE_TASKS
hint_for_resume_stream = h;
#endif
#if __TBB_PREVIEW_CRITICAL_TASKS
hint_for_critical_stream = h;
#endif
}
#if __TBB_PREVIEW_CRITICAL_TASKS
unsigned& critical_hint() {
return hint_for_critical_stream;
}
#endif
private:
//! Get a task from the local pool at specified location T.
/** Returns the pointer to the task or nullptr if the task cannot be executed,
e.g. proxy has been deallocated or isolation constraint is not met.
tasks_omitted tells if some tasks have been omitted.
Called only by the pool owner. The caller should guarantee that the
position T is not available for a thief. **/
d1::task* get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation);
//! Makes sure that the task pool can accommodate at least n more elements
/** If necessary relocates existing task pointers or grows the ready task deque.
* Returns (possible updated) tail index (not accounting for n). **/
std::size_t prepare_task_pool(std::size_t num_tasks) {
std::size_t T = tail.load(std::memory_order_relaxed); // mirror
if ( T + num_tasks <= my_task_pool_size ) {
return T;
}
std::size_t new_size = num_tasks;
if ( !my_task_pool_size ) {
__TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), nullptr);
__TBB_ASSERT( !task_pool_ptr, nullptr);
if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size;
allocate_task_pool( new_size );
return 0;
}
acquire_task_pool();
std::size_t H = head.load(std::memory_order_relaxed); // mirror
d1::task** new_task_pool = task_pool_ptr;
__TBB_ASSERT( my_task_pool_size >= min_task_pool_size, nullptr);
// Count not skipped tasks. Consider using std::count_if.
for ( std::size_t i = H; i < T; ++i )
if ( new_task_pool[i] ) ++new_size;
// If the free space at the beginning of the task pool is too short, we
// are likely facing a pathological single-producer-multiple-consumers
// scenario, and thus it's better to expand the task pool
bool allocate = new_size > my_task_pool_size - min_task_pool_size/4;
if ( allocate ) {
// Grow task pool. As this operation is rare, and its cost is asymptotically
// amortizable, we can tolerate new task pool allocation done under the lock.
if ( new_size < 2 * my_task_pool_size )
new_size = 2 * my_task_pool_size;
allocate_task_pool( new_size ); // updates my_task_pool_size
}
// Filter out skipped tasks. Consider using std::copy_if.
std::size_t T1 = 0;
for ( std::size_t i = H; i < T; ++i ) {
if ( new_task_pool[i] ) {
task_pool_ptr[T1++] = new_task_pool[i];
}
}
// Deallocate the previous task pool if a new one has been allocated.
if ( allocate )
cache_aligned_deallocate( new_task_pool );
else
fill_with_canary_pattern( T1, tail );
// Publish the new state.
commit_relocated_tasks( T1 );
// assert_task_pool_valid();
return T1;
}
//! Makes newly spawned tasks visible to thieves
void commit_spawned_tasks(std::size_t new_tail) {
__TBB_ASSERT (new_tail <= my_task_pool_size, "task deque end was overwritten");
// emit "task was released" signal
// Release fence is necessary to make sure that previously stored task pointers
// are visible to thieves.
tail.store(new_tail, std::memory_order_release);
}
//! Used by workers to enter the task pool
/** Does not lock the task pool in case if arena slot has been successfully grabbed. **/
void publish_task_pool() {
__TBB_ASSERT ( task_pool == EmptyTaskPool, "someone else grabbed my arena slot?" );
__TBB_ASSERT ( head.load(std::memory_order_relaxed) < tail.load(std::memory_order_relaxed),
"entering arena without tasks to share" );
// Release signal on behalf of previously spawned tasks (when this thread was not in arena yet)
task_pool.store(task_pool_ptr, std::memory_order_release );
}
//! Locks the local task pool
/** Garbles task_pool for the duration of the lock. Requires correctly set task_pool_ptr.
ATTENTION: This method is mostly the same as generic_scheduler::lock_task_pool(), with
a little different logic of slot state checks (slot is either locked or points
to our task pool). Thus if either of them is changed, consider changing the counterpart as well. **/
void acquire_task_pool() {
if (!is_task_pool_published()) {
return; // we are not in arena - nothing to lock
}
bool sync_prepare_done = false;
for( atomic_backoff b;;b.pause() ) {
#if TBB_USE_ASSERT
// Local copy of the arena slot task pool pointer is necessary for the next
// assertion to work correctly to exclude asynchronous state transition effect.
d1::task** tp = task_pool.load(std::memory_order_relaxed);
__TBB_ASSERT( tp == LockedTaskPool || tp == task_pool_ptr, "slot ownership corrupt?" );
#endif
d1::task** expected = task_pool_ptr;
if( task_pool.load(std::memory_order_relaxed) != LockedTaskPool &&
task_pool.compare_exchange_strong(expected, LockedTaskPool ) ) {
// We acquired our own slot
break;
} else if( !sync_prepare_done ) {
// Start waiting
sync_prepare_done = true;
}
// Someone else acquired a lock, so pause and do exponential backoff.
}
__TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "not really acquired task pool" );
}
//! Unlocks the local task pool
/** Restores task_pool munged by acquire_task_pool. Requires
correctly set task_pool_ptr. **/
void release_task_pool() {
if ( !(task_pool.load(std::memory_order_relaxed) != EmptyTaskPool) )
return; // we are not in arena - nothing to unlock
__TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "arena slot is not locked" );
task_pool.store( task_pool_ptr, std::memory_order_release );
}
//! Locks victim's task pool, and returns pointer to it. The pointer can be nullptr.
/** Garbles victim_arena_slot->task_pool for the duration of the lock. **/
d1::task** lock_task_pool() {
d1::task** victim_task_pool;
for ( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) {
victim_task_pool = task_pool.load(std::memory_order_relaxed);
// Microbenchmarks demonstrated that aborting stealing attempt when the
// victim's task pool is locked degrade performance.
// NOTE: Do not use comparison of head and tail indices to check for
// the presence of work in the victim's task pool, as they may give
// incorrect indication because of task pool relocations and resizes.
if (victim_task_pool == EmptyTaskPool) {
break;
}
d1::task** expected = victim_task_pool;
if (victim_task_pool != LockedTaskPool && task_pool.compare_exchange_strong(expected, LockedTaskPool) ) {
// We've locked victim's task pool
break;
}
// Someone else acquired a lock, so pause and do exponential backoff.
backoff.pause();
}
__TBB_ASSERT(victim_task_pool == EmptyTaskPool ||
(task_pool.load(std::memory_order_relaxed) == LockedTaskPool &&
victim_task_pool != LockedTaskPool), "not really locked victim's task pool?");
return victim_task_pool;
}
//! Unlocks victim's task pool
/** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/
void unlock_task_pool(d1::task** victim_task_pool) {
__TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked");
__TBB_ASSERT(victim_task_pool != LockedTaskPool, nullptr);
task_pool.store(victim_task_pool, std::memory_order_release);
}
#if TBB_USE_ASSERT
bool is_local_task_pool_quiescent() const {
d1::task** tp = task_pool.load(std::memory_order_relaxed);
return tp == EmptyTaskPool || tp == LockedTaskPool;
}
bool is_quiescent_local_task_pool_empty() const {
__TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent");
return head.load(std::memory_order_relaxed) == tail.load(std::memory_order_relaxed);
}
bool is_quiescent_local_task_pool_reset() const {
__TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent");
return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0;
}
#endif // TBB_USE_ASSERT
//! Leave the task pool
/** Leaving task pool automatically releases the task pool if it is locked. **/
void leave_task_pool() {
__TBB_ASSERT(is_task_pool_published(), "Not in arena");
// Do not reset my_arena_index. It will be used to (attempt to) re-acquire the slot next time
__TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when leaving arena");
__TBB_ASSERT(is_quiescent_local_task_pool_empty(), "Cannot leave arena when the task pool is not empty");
// No release fence is necessary here as this assignment precludes external
// accesses to the local task pool when becomes visible. Thus it is harmless
// if it gets hoisted above preceding local bookkeeping manipulations.
task_pool.store(EmptyTaskPool, std::memory_order_relaxed);
}
//! Resets head and tail indices to 0, and leaves task pool
/** The task pool must be locked by the owner (via acquire_task_pool).**/
void reset_task_pool_and_leave() {
__TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when resetting task pool");
tail.store(0, std::memory_order_relaxed);
head.store(0, std::memory_order_relaxed);
leave_task_pool();
}
//! Makes relocated tasks visible to thieves and releases the local task pool.
/** Obviously, the task pool must be locked when calling this method. **/
void commit_relocated_tasks(std::size_t new_tail) {
__TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool must be locked when calling commit_relocated_tasks()");
head.store(0, std::memory_order_relaxed);
// Tail is updated last to minimize probability of a thread making arena
// snapshot being misguided into thinking that this task pool is empty.
tail.store(new_tail, std::memory_order_release);
release_task_pool();
}
};
} // namespace r1
} // namespace detail
} // namespace tbb
#endif // __TBB_arena_slot_H

98
third_party/tbb/assert_impl.hh vendored Normal file
View file

@ -0,0 +1,98 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_assert_impl_H
#define __TBB_assert_impl_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/libcxx/cstdio"
#include "third_party/libcxx/cstdlib"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/cstdarg"
#if _MSC_VER && _DEBUG
// MISSING #include <crtdbg.h>
#endif
#include "third_party/libcxx/mutex"
#if __TBBMALLOC_BUILD
namespace rml { namespace internal {
#else
namespace tbb {
namespace detail {
namespace r1 {
#endif
// TODO: consider extension for formatted error description string
static void assertion_failure_impl(const char* location, int line, const char* expression, const char* comment) {
std::fprintf(stderr, "Assertion %s failed (located in the %s function, line in file: %d)\n",
expression, location, line);
if (comment) {
std::fprintf(stderr, "Detailed description: %s\n", comment);
}
#if _MSC_VER && _DEBUG
if (1 == _CrtDbgReport(_CRT_ASSERT, location, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) {
_CrtDbgBreak();
} else
#endif
{
std::fflush(stderr);
std::abort();
}
}
// Do not move the definition into the assertion_failure function because it will require "magic statics".
// It will bring a dependency on C++ runtime on some platforms while assert_impl.h is reused in tbbmalloc
// that should not depend on C++ runtime
static std::atomic<tbb::detail::do_once_state> assertion_state;
void __TBB_EXPORTED_FUNC assertion_failure(const char* location, int line, const char* expression, const char* comment) {
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
// Workaround for erroneous "unreachable code" during assertion throwing using call_once
#pragma warning (push)
#pragma warning (disable: 4702)
#endif
// We cannot use std::call_once because it brings a dependency on C++ runtime on some platforms
// while assert_impl.h is reused in tbbmalloc that should not depend on C++ runtime
atomic_do_once([&](){ assertion_failure_impl(location, line, expression, comment); }, assertion_state);
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
#pragma warning (pop)
#endif
}
//! Report a runtime warning.
void runtime_warning( const char* format, ... ) {
char str[1024]; std::memset(str, 0, 1024);
va_list args; va_start(args, format);
vsnprintf( str, 1024-1, format, args);
va_end(args);
fprintf(stderr, "TBB Warning: %s\n", str);
}
#if __TBBMALLOC_BUILD
}} // namespaces rml::internal
#else
} // namespace r1
} // namespace detail
} // namespace tbb
#endif
#endif // __TBB_assert_impl_H

171
third_party/tbb/blocked_range.hh vendored Normal file
View file

@ -0,0 +1,171 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_blocked_range_H
#define __TBB_blocked_range_H
#include "third_party/libcxx/cstddef"
#include "third_party/tbb/detail/_range_common.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/version.hh"
namespace tbb {
namespace detail {
namespace d1 {
/** \page range_req Requirements on range concept
Class \c R implementing the concept of range must define:
- \code R::R( const R& ); \endcode Copy constructor
- \code R::~R(); \endcode Destructor
- \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges
- \code bool R::empty() const; \endcode True if range is empty
- \code R::R( R& r, split ); \endcode Split range \c r into two subranges.
**/
//! A range over which to iterate.
/** @ingroup algorithms */
template<typename Value>
__TBB_requires(blocked_range_value<Value>)
class blocked_range {
public:
//! Type of a value
/** Called a const_iterator for sake of algorithms that need to treat a blocked_range
as an STL container. */
using const_iterator = Value;
//! Type for size of a range
using size_type = std::size_t;
//! Construct range over half-open interval [begin,end), with the given grainsize.
blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) :
my_end(end_), my_begin(begin_), my_grainsize(grainsize_)
{
__TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
}
//! Beginning of range.
const_iterator begin() const { return my_begin; }
//! One past last value in range.
const_iterator end() const { return my_end; }
//! Size of the range
/** Unspecified if end()<begin(). */
size_type size() const {
__TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
return size_type(my_end-my_begin);
}
//! The grain size for this range.
size_type grainsize() const { return my_grainsize; }
//------------------------------------------------------------------------
// Methods that implement Range concept
//------------------------------------------------------------------------
//! True if range is empty.
bool empty() const { return !(my_begin<my_end); }
//! True if range is divisible.
/** Unspecified if end()<begin(). */
bool is_divisible() const { return my_grainsize<size(); }
//! Split range.
/** The new Range *this has the second part, the old range r has the first part.
Unspecified if end()<begin() or !is_divisible(). */
blocked_range( blocked_range& r, split ) :
my_end(r.my_end),
my_begin(do_split(r, split())),
my_grainsize(r.my_grainsize)
{
// only comparison 'less than' is required from values of blocked_range objects
__TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
}
//! Split range.
/** The new Range *this has the second part split according to specified proportion, the old range r has the first part.
Unspecified if end()<begin() or !is_divisible(). */
blocked_range( blocked_range& r, proportional_split& proportion ) :
my_end(r.my_end),
my_begin(do_split(r, proportion)),
my_grainsize(r.my_grainsize)
{
// only comparison 'less than' is required from values of blocked_range objects
__TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
}
private:
/** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */
Value my_end;
Value my_begin;
size_type my_grainsize;
//! Auxiliary function used by the splitting constructor.
static Value do_split( blocked_range& r, split )
{
__TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;
r.my_end = middle;
return middle;
}
static Value do_split( blocked_range& r, proportional_split& proportion )
{
__TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
// usage of 32-bit floating point arithmetic is not enough to handle ranges of
// more than 2^24 iterations accurately. However, even on ranges with 2^64
// iterations the computational error approximately equals to 0.000001% which
// makes small impact on uniform distribution of such range's iterations (assuming
// all iterations take equal time to complete). See 'test_partitioner_whitebox'
// for implementation of an exact split algorithm
size_type right_part = size_type(float(r.size()) * float(proportion.right())
/ float(proportion.left() + proportion.right()) + 0.5f);
return r.my_end = Value(r.my_end - right_part);
}
template<typename RowValue, typename ColValue>
__TBB_requires(blocked_range_value<RowValue> &&
blocked_range_value<ColValue>)
friend class blocked_range2d;
template<typename RowValue, typename ColValue, typename PageValue>
__TBB_requires(blocked_range_value<RowValue> &&
blocked_range_value<ColValue> &&
blocked_range_value<PageValue>)
friend class blocked_range3d;
template<typename DimValue, unsigned int N, typename>
__TBB_requires(blocked_range_value<DimValue>)
friend class blocked_rangeNd_impl;
};
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::blocked_range;
// Split types
using detail::split;
using detail::proportional_split;
} // namespace v1
} // namespace tbb
#endif /* __TBB_blocked_range_H */

112
third_party/tbb/blocked_range2d.hh vendored Normal file
View file

@ -0,0 +1,112 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_blocked_range2d_H
#define __TBB_blocked_range2d_H
#include "third_party/libcxx/cstddef"
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_range_common.hh"
#include "third_party/tbb/blocked_range.hh"
namespace tbb {
namespace detail {
namespace d1 {
//! A 2-dimensional range that models the Range concept.
/** @ingroup algorithms */
template<typename RowValue, typename ColValue = RowValue>
__TBB_requires(blocked_range_value<RowValue> &&
blocked_range_value<ColValue>)
class blocked_range2d {
public:
//! Type for size of an iteration range
using row_range_type = blocked_range<RowValue>;
using col_range_type = blocked_range<ColValue>;
private:
row_range_type my_rows;
col_range_type my_cols;
public:
blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
my_rows(row_begin,row_end,row_grainsize),
my_cols(col_begin,col_end,col_grainsize)
{}
blocked_range2d( RowValue row_begin, RowValue row_end,
ColValue col_begin, ColValue col_end ) :
my_rows(row_begin,row_end),
my_cols(col_begin,col_end)
{}
//! True if range is empty
bool empty() const {
// Range is empty if at least one dimension is empty.
return my_rows.empty() || my_cols.empty();
}
//! True if range is divisible into two pieces.
bool is_divisible() const {
return my_rows.is_divisible() || my_cols.is_divisible();
}
blocked_range2d( blocked_range2d& r, split ) :
my_rows(r.my_rows),
my_cols(r.my_cols)
{
split split_obj;
do_split(r, split_obj);
}
blocked_range2d( blocked_range2d& r, proportional_split& proportion ) :
my_rows(r.my_rows),
my_cols(r.my_cols)
{
do_split(r, proportion);
}
//! The rows of the iteration space
const row_range_type& rows() const { return my_rows; }
//! The columns of the iteration space
const col_range_type& cols() const { return my_cols; }
private:
template <typename Split>
void do_split( blocked_range2d& r, Split& split_obj ) {
if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
}
}
};
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::blocked_range2d;
} // namespace v1
} // namespace tbb
#endif /* __TBB_blocked_range2d_H */

131
third_party/tbb/blocked_range3d.hh vendored Normal file
View file

@ -0,0 +1,131 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_blocked_range3d_H
#define __TBB_blocked_range3d_H
#include "third_party/libcxx/cstddef"
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/blocked_range.hh"
namespace tbb {
namespace detail {
namespace d1 {
//! A 3-dimensional range that models the Range concept.
/** @ingroup algorithms */
template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue>
__TBB_requires(blocked_range_value<PageValue> &&
blocked_range_value<RowValue> &&
blocked_range_value<ColValue>)
class blocked_range3d {
public:
//! Type for size of an iteration range
using page_range_type = blocked_range<PageValue>;
using row_range_type = blocked_range<RowValue>;
using col_range_type = blocked_range<ColValue>;
private:
page_range_type my_pages;
row_range_type my_rows;
col_range_type my_cols;
public:
blocked_range3d( PageValue page_begin, PageValue page_end,
RowValue row_begin, RowValue row_end,
ColValue col_begin, ColValue col_end ) :
my_pages(page_begin,page_end),
my_rows(row_begin,row_end),
my_cols(col_begin,col_end)
{}
blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize,
RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
my_pages(page_begin,page_end,page_grainsize),
my_rows(row_begin,row_end,row_grainsize),
my_cols(col_begin,col_end,col_grainsize)
{}
//! True if range is empty
bool empty() const {
// Range is empty if at least one dimension is empty.
return my_pages.empty() || my_rows.empty() || my_cols.empty();
}
//! True if range is divisible into two pieces.
bool is_divisible() const {
return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
}
blocked_range3d( blocked_range3d& r, split split_obj ) :
my_pages(r.my_pages),
my_rows(r.my_rows),
my_cols(r.my_cols)
{
do_split(r, split_obj);
}
blocked_range3d( blocked_range3d& r, proportional_split& proportion ) :
my_pages(r.my_pages),
my_rows(r.my_rows),
my_cols(r.my_cols)
{
do_split(r, proportion);
}
//! The pages of the iteration space
const page_range_type& pages() const { return my_pages; }
//! The rows of the iteration space
const row_range_type& rows() const { return my_rows; }
//! The columns of the iteration space
const col_range_type& cols() const { return my_cols; }
private:
template <typename Split>
void do_split( blocked_range3d& r, Split& split_obj) {
if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
}
} else {
if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj);
}
}
}
};
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::blocked_range3d;
} // namespace v1
} // namespace tbb
#endif /* __TBB_blocked_range3d_H */

148
third_party/tbb/blocked_rangeNd.hh vendored Normal file
View file

@ -0,0 +1,148 @@
// clang-format off
/*
Copyright (c) 2017-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_blocked_rangeNd_H
#define __TBB_blocked_rangeNd_H
#if !TBB_PREVIEW_BLOCKED_RANGE_ND
#error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h
#endif
#include "third_party/libcxx/algorithm" // std::any_of
#include "third_party/libcxx/array"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/type_traits" // std::is_same, std::enable_if
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_template_helpers.hh" // index_sequence, make_index_sequence
#include "third_party/tbb/detail/_range_common.hh"
#include "third_party/tbb/blocked_range.hh"
namespace tbb {
namespace detail {
namespace d1 {
/*
The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with
exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity
to use braced-init-list parameters to initialize each dimension.
Use of parameters, whose representation is a braced-init-list, but they're not
std::initializer_list or a reference to one, produces a non-deduced context
within template argument deduction.
NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl
(and not e.g. a derived class), otherwise it would need to declare its own ctor
facing the same problem that the impl class solves.
*/
template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>>
__TBB_requires(blocked_range_value<Value>)
class blocked_rangeNd_impl;
template<typename Value, unsigned int N, std::size_t... Is>
__TBB_requires(blocked_range_value<Value>)
class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> {
public:
//! Type of a value.
using value_type = Value;
private:
//! Helper type to construct range with N tbb::blocked_range<value_type> objects.
template<std::size_t>
using dim_type_helper = tbb::blocked_range<value_type>;
public:
blocked_rangeNd_impl() = delete;
//! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>.
blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {}
//! Dimensionality of a range.
static constexpr unsigned int ndims() { return N; }
//! Range in certain dimension.
const tbb::blocked_range<value_type>& dim(unsigned int dimension) const {
__TBB_ASSERT(dimension < N, "out of bound");
return my_dims[dimension];
}
//------------------------------------------------------------------------
// Methods that implement Range concept
//------------------------------------------------------------------------
//! True if at least one dimension is empty.
bool empty() const {
return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
return d.empty();
});
}
//! True if at least one dimension is divisible.
bool is_divisible() const {
return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
return d.is_divisible();
});
}
blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) {
do_split(r, proportion);
}
blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) {
do_split(r, proportion);
}
private:
static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed");
//! Ranges in each dimension.
std::array<tbb::blocked_range<value_type>, N> my_dims;
template<typename split_type>
void do_split(blocked_rangeNd_impl& r, split_type proportion) {
static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect");
__TBB_ASSERT(r.is_divisible(), "can't split not divisible range");
auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) {
return (first.size() * second.grainsize() < second.size() * first.grainsize());
});
auto r_it = r.my_dims.begin() + (my_it - my_dims.begin());
my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion);
// (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to
// (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept
__TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin),
"blocked_range has been split incorrectly");
}
};
template<typename Value, unsigned int N>
using blocked_rangeNd = blocked_rangeNd_impl<Value, N>;
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::blocked_rangeNd;
} // namespace v1
} // namespace tbb
#endif /* __TBB_blocked_rangeNd_H */

View file

@ -0,0 +1,190 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_cache_aligned_allocator_H
#define __TBB_cache_aligned_allocator_H
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/libcxx/cstdlib"
#include "third_party/libcxx/utility"
#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
// MISSING #include <memory_resource>
#endif
namespace tbb {
namespace detail {
namespace r1 {
TBB_EXPORT void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size);
TBB_EXPORT void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p);
TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC cache_line_size();
}
namespace d1 {
template<typename T>
class cache_aligned_allocator {
public:
using value_type = T;
using propagate_on_container_move_assignment = std::true_type;
//! Always defined for TBB containers (supported since C++17 for std containers)
using is_always_equal = std::true_type;
cache_aligned_allocator() = default;
template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {}
//! Allocate space for n objects, starting on a cache/sector line.
__TBB_nodiscard T* allocate(std::size_t n) {
return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type)));
}
//! Free block of memory that starts on a cache line
void deallocate(T* p, std::size_t) {
r1::cache_aligned_deallocate(p);
}
//! Largest value for which method allocate might succeed.
std::size_t max_size() const noexcept {
return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type);
}
#if TBB_ALLOCATOR_TRAITS_BROKEN
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
template<typename U> struct rebind {
using other = cache_aligned_allocator<U>;
};
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new (p) U(std::forward<Args>(args)...); }
void destroy(pointer p) { p->~value_type(); }
pointer address(reference x) const { return &x; }
const_pointer address(const_reference x) const { return &x; }
#endif // TBB_ALLOCATOR_TRAITS_BROKEN
};
#if TBB_ALLOCATOR_TRAITS_BROKEN
template<>
class cache_aligned_allocator<void> {
public:
using pointer = void*;
using const_pointer = const void*;
using value_type = void;
template<typename U> struct rebind {
using other = cache_aligned_allocator<U>;
};
};
#endif
template<typename T, typename U>
bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; }
#if !__TBB_CPP20_COMPARISONS_PRESENT
template<typename T, typename U>
bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; }
#endif
#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
//! C++17 memory resource wrapper to ensure cache line size alignment
class cache_aligned_resource : public std::pmr::memory_resource {
public:
cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {}
explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {}
std::pmr::memory_resource* upstream_resource() const {
return m_upstream;
}
private:
//! We don't know what memory resource set. Use padding to guarantee alignment
void* do_allocate(std::size_t bytes, std::size_t alignment) override {
// TODO: make it common with tbb_allocator.cpp
std::size_t cache_line_alignment = correct_alignment(alignment);
std::size_t space = correct_size(bytes) + cache_line_alignment;
std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space));
__TBB_ASSERT(base != 0, "Upstream resource returned nullptr.");
// Round up to the next cache line (align the base address)
std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1);
__TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header");
__TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
// Record where block actually starts.
(reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
return reinterpret_cast<void*>(result);
}
void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override {
if (ptr) {
// Recover where block actually starts
std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1];
m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment));
}
}
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
if (this == &other) { return true; }
#if __TBB_USE_OPTIONAL_RTTI
const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other);
return other_res && (upstream_resource() == other_res->upstream_resource());
#else
return false;
#endif
}
std::size_t correct_alignment(std::size_t alignment) {
__TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2");
#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT
std::size_t cache_line_size = std::hardware_destructive_interference_size;
#else
std::size_t cache_line_size = r1::cache_line_size();
#endif
return alignment < cache_line_size ? cache_line_size : alignment;
}
std::size_t correct_size(std::size_t bytes) {
// To handle the case, when small size requested. There could be not
// enough space to store the original pointer.
return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes;
}
std::pmr::memory_resource* m_upstream;
};
#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::cache_aligned_allocator;
#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
using detail::d1::cache_aligned_resource;
#endif
} // namespace v1
} // namespace tbb
#endif /* __TBB_cache_aligned_allocator_H */

View file

@ -0,0 +1,86 @@
// clang-format off
/*
Copyright (c) 2022-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_cancellation_disseminator_H
#define _TBB_cancellation_disseminator_H
#include "third_party/tbb/mutex.hh"
#include "third_party/tbb/task_group.hh"
#include "third_party/tbb/intrusive_list.hh"
#include "third_party/tbb/thread_data.hh"
namespace tbb {
namespace detail {
namespace r1 {
class cancellation_disseminator {
public:
//! Finds all contexts affected by the state change and propagates the new state to them.
/* The propagation is relayed to the cancellation_disseminator because tasks created by one
external thread can be passed to and executed by other external threads. This means
that context trees can span several arenas at once and thus state change
propagation cannot be generally localized to one arena only.
*/
bool propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, d1::task_group_context& src, uint32_t new_state) {
if (src.my_may_have_children.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) {
return true;
}
// The whole propagation algorithm is under the lock in order to ensure correctness
// in case of concurrent state changes at the different levels of the context tree.
threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex);
// TODO: consider to use double-check idiom
if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) {
// Another thread has concurrently changed the state. Back down.
return false;
}
// Advance global state propagation epoch
++the_context_state_propagation_epoch;
// Propagate to all workers and external threads and sync up their local epochs with the global one
// The whole propagation sequence is locked, thus no contention is expected
for (auto& thr_data : my_threads_list) {
thr_data.propagate_task_group_state(mptr_state, src, new_state);
}
return true;
}
void register_thread(thread_data& td) {
threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex);
my_threads_list.push_front(td);
}
void unregister_thread(thread_data& td) {
threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex);
my_threads_list.remove(td);
}
private:
using thread_data_list_type = intrusive_list<thread_data>;
using threads_list_mutex_type = d1::mutex;
threads_list_mutex_type my_threads_list_mutex;
thread_data_list_type my_threads_list;
};
} // namespace r1
} // namespace detail
} // namespace tbb
#endif // _TBB_cancellation_disseminator_H

428
third_party/tbb/co_context.hh vendored Normal file
View file

@ -0,0 +1,428 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_co_context_H
#define _TBB_co_context_H
#include "third_party/tbb/detail/_config.hh"
#if __TBB_RESUMABLE_TASKS
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/cstdint"
#if __TBB_RESUMABLE_TASKS_USE_THREADS
#if _WIN32 || _WIN64
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
#else
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/clock.h"
#include "libc/thread/thread.h"
#include "libc/thread/thread2.h"
#endif
#include "third_party/libcxx/condition_variable"
#include "third_party/tbb/governor.hh"
#elif _WIN32 || _WIN64
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
#else
// ucontext.h API is deprecated since macOS 10.6
#if __APPLE__
#if __INTEL_COMPILER
#pragma warning(push)
#pragma warning(disable:1478)
#elif __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
#endif // __APPLE__
#include "libc/calls/ucontext.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/mlock.h"
#include "libc/sysv/consts/msync.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/madv.h"
#include "libc/sysv/consts/mfd.h"
#include "libc/sysv/consts/mremap.h" // mprotect
#include "third_party/tbb/governor.hh" // default_page_size()
#ifndef MAP_STACK
// macOS* does not define MAP_STACK
#define MAP_STACK 0
#endif
#ifndef MAP_ANONYMOUS
// macOS* defines MAP_ANON, which is deprecated in Linux*.
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif // _WIN32 || _WIN64
namespace tbb {
namespace detail {
namespace r1 {
#if __TBB_RESUMABLE_TASKS_USE_THREADS
struct coroutine_type {
#if _WIN32 || _WIN64
using handle_type = HANDLE;
#else
using handle_type = pthread_t;
#endif
handle_type my_thread;
std::condition_variable my_condvar;
std::mutex my_mutex;
thread_data* my_thread_data{ nullptr };
bool my_is_active{ true };
};
#elif _WIN32 || _WIN64
typedef LPVOID coroutine_type;
#else
struct coroutine_type {
coroutine_type() : my_context(), my_stack(), my_stack_size() {}
ucontext_t my_context;
void* my_stack;
std::size_t my_stack_size;
};
#endif
// Forward declaration of the coroutine API.
void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg);
void current_coroutine(coroutine_type& c);
void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine);
void destroy_coroutine(coroutine_type& c);
class co_context {
enum co_state {
co_invalid,
co_suspended,
co_executing,
co_destroyed
};
coroutine_type my_coroutine;
co_state my_state;
public:
co_context(std::size_t stack_size, void* arg)
: my_state(stack_size ? co_suspended : co_executing)
{
if (stack_size) {
__TBB_ASSERT(arg != nullptr, nullptr);
create_coroutine(my_coroutine, stack_size, arg);
} else {
current_coroutine(my_coroutine);
}
}
~co_context() {
__TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), nullptr);
if (my_state == co_suspended) {
#if __TBB_RESUMABLE_TASKS_USE_THREADS
my_state = co_executing;
#endif
destroy_coroutine(my_coroutine);
}
my_state = co_destroyed;
}
void resume(co_context& target) {
// Do not create non-trivial objects on the stack of this function. They might never be destroyed.
__TBB_ASSERT(my_state == co_executing, nullptr);
__TBB_ASSERT(target.my_state == co_suspended, nullptr);
my_state = co_suspended;
target.my_state = co_executing;
// 'target' can reference an invalid object after swap_coroutine. Do not access it.
swap_coroutine(my_coroutine, target.my_coroutine);
__TBB_ASSERT(my_state == co_executing, nullptr);
}
};
#if _WIN32 || _WIN64
/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept;
#else
/* [[noreturn]] */ void co_local_wait_for_all(unsigned hi, unsigned lo) noexcept;
#endif
#if __TBB_RESUMABLE_TASKS_USE_THREADS
void handle_perror(int error_code, const char* what);
inline void check(int error_code, const char* routine) {
if (error_code) {
handle_perror(error_code, routine);
}
}
using thread_data_t = std::pair<coroutine_type&, void*&>;
#if _WIN32 || _WIN64
inline unsigned WINAPI coroutine_thread_func(void* d)
#else
inline void* coroutine_thread_func(void* d)
#endif
{
thread_data_t& data = *static_cast<thread_data_t*>(d);
coroutine_type& c = data.first;
void* arg = data.second;
{
std::unique_lock<std::mutex> lock(c.my_mutex);
__TBB_ASSERT(c.my_thread_data == nullptr, nullptr);
c.my_is_active = false;
// We read the data notify the waiting thread
data.second = nullptr;
c.my_condvar.notify_one();
c.my_condvar.wait(lock, [&c] { return c.my_is_active == true; });
}
__TBB_ASSERT(c.my_thread_data != nullptr, nullptr);
governor::set_thread_data(*c.my_thread_data);
#if _WIN32 || _WIN64
co_local_wait_for_all(arg);
return 0;
#else
std::uintptr_t addr = std::uintptr_t(arg);
unsigned lo = unsigned(addr);
unsigned hi = unsigned(std::uint64_t(addr) >> 32);
__TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr);
co_local_wait_for_all(hi, lo);
return nullptr;
#endif
};
inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) {
thread_data_t data{ c, arg };
#if _WIN32 || _WIN64
c.my_thread = (HANDLE)_beginthreadex(nullptr, unsigned(stack_size), coroutine_thread_func, &data, STACK_SIZE_PARAM_IS_A_RESERVATION, nullptr);
if (!c.my_thread) {
handle_perror(0, "create_coroutine: _beginthreadex failed\n");
}
#else
pthread_attr_t s;
check(pthread_attr_init(&s), "pthread_attr_init has failed");
if (stack_size > 0) {
check(pthread_attr_setstacksize(&s, stack_size), "pthread_attr_setstack_size has failed");
}
check(pthread_create(&c.my_thread, &s, coroutine_thread_func, &data), "pthread_create has failed");
check(pthread_attr_destroy(&s), "pthread_attr_destroy has failed");
#endif
// Wait for the just created thread to read the data
std::unique_lock<std::mutex> lock(c.my_mutex);
c.my_condvar.wait(lock, [&arg] { return arg == nullptr; });
}
inline void current_coroutine(coroutine_type& c) {
#if _WIN32 || _WIN64
c.my_thread = GetCurrentThread();
#else
c.my_thread = pthread_self();
#endif
}
inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) {
thread_data* td = governor::get_thread_data();
__TBB_ASSERT(prev_coroutine.my_is_active == true, "The current thread should be active");
// Detach our state before notification other thread
// (because we might be notified just after other thread notification)
prev_coroutine.my_thread_data = nullptr;
prev_coroutine.my_is_active = false;
governor::clear_thread_data();
{
std::unique_lock<std::mutex> lock(new_coroutine.my_mutex);
__TBB_ASSERT(new_coroutine.my_is_active == false, "The sleeping thread should not be active");
__TBB_ASSERT(new_coroutine.my_thread_data == nullptr, "The sleeping thread should not be active");
new_coroutine.my_thread_data = td;
new_coroutine.my_is_active = true;
new_coroutine.my_condvar.notify_one();
}
std::unique_lock<std::mutex> lock(prev_coroutine.my_mutex);
prev_coroutine.my_condvar.wait(lock, [&prev_coroutine] { return prev_coroutine.my_is_active == true; });
__TBB_ASSERT(governor::get_thread_data() != nullptr, nullptr);
governor::set_thread_data(*prev_coroutine.my_thread_data);
}
inline void destroy_coroutine(coroutine_type& c) {
{
std::unique_lock<std::mutex> lock(c.my_mutex);
__TBB_ASSERT(c.my_thread_data == nullptr, "The sleeping thread should not be active");
__TBB_ASSERT(c.my_is_active == false, "The sleeping thread should not be active");
c.my_is_active = true;
c.my_condvar.notify_one();
}
#if _WIN32 || _WIN64
WaitForSingleObject(c.my_thread, INFINITE);
CloseHandle(c.my_thread);
#else
check(pthread_join(c.my_thread, nullptr), "pthread_join has failed");
#endif
}
#elif _WIN32 || _WIN64
inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) {
__TBB_ASSERT(arg, nullptr);
c = CreateFiber(stack_size, co_local_wait_for_all, arg);
__TBB_ASSERT(c, nullptr);
}
inline void current_coroutine(coroutine_type& c) {
c = IsThreadAFiber() ? GetCurrentFiber() :
ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH);
__TBB_ASSERT(c, nullptr);
}
inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) {
if (!IsThreadAFiber()) {
ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH);
}
__TBB_ASSERT(new_coroutine, nullptr);
prev_coroutine = GetCurrentFiber();
__TBB_ASSERT(prev_coroutine, nullptr);
SwitchToFiber(new_coroutine);
}
inline void destroy_coroutine(coroutine_type& c) {
__TBB_ASSERT(c, nullptr);
DeleteFiber(c);
}
#else // !(_WIN32 || _WIN64)
inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) {
const std::size_t REG_PAGE_SIZE = governor::default_page_size();
const std::size_t page_aligned_stack_size = (stack_size + (REG_PAGE_SIZE - 1)) & ~(REG_PAGE_SIZE - 1);
const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE;
// Allocate the stack with protection property
std::uintptr_t stack_ptr = (std::uintptr_t)mmap(nullptr, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
__TBB_ASSERT((void*)stack_ptr != MAP_FAILED, nullptr);
// Allow read write on our stack (guarded pages are still protected)
int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE);
__TBB_ASSERT_EX(!err, nullptr);
// Remember the stack state
c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE);
c.my_stack_size = page_aligned_stack_size;
err = getcontext(&c.my_context);
__TBB_ASSERT_EX(!err, nullptr);
c.my_context.uc_link = nullptr;
// cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error
c.my_context.uc_stack.ss_sp = (char*)c.my_stack;
c.my_context.uc_stack.ss_size = c.my_stack_size;
c.my_context.uc_stack.ss_flags = 0;
typedef void(*coroutine_func_t)();
std::uintptr_t addr = std::uintptr_t(arg);
unsigned lo = unsigned(addr);
unsigned hi = unsigned(std::uint64_t(addr) >> 32);
__TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr);
makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, 2, hi, lo);
}
inline void current_coroutine(coroutine_type& c) {
int err = getcontext(&c.my_context);
__TBB_ASSERT_EX(!err, nullptr);
}
inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) {
int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context);
__TBB_ASSERT_EX(!err, nullptr);
}
inline void destroy_coroutine(coroutine_type& c) {
const std::size_t REG_PAGE_SIZE = governor::default_page_size();
// Free stack memory with guarded pages
munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE);
// Clear the stack state afterwards
c.my_stack = nullptr;
c.my_stack_size = 0;
}
#if __APPLE__
#if __INTEL_COMPILER
#pragma warning(pop) // 1478 warning
#elif __clang__
#pragma clang diagnostic pop // "-Wdeprecated-declarations"
#endif
#endif
#endif // _WIN32 || _WIN64
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* __TBB_RESUMABLE_TASKS */
#endif /* _TBB_co_context_H */

View file

@ -0,0 +1,236 @@
// clang-format off
/*
Copyright (c) 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_collaborative_call_once_H
#define __TBB_collaborative_call_once_H
#include "third_party/tbb/task_arena.hh"
#include "third_party/tbb/task_group.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace d1 {
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Suppress warning: structure was padded due to alignment specifier
#pragma warning (push)
#pragma warning (disable: 4324)
#endif
constexpr std::uintptr_t collaborative_once_max_references = max_nfs_size;
constexpr std::uintptr_t collaborative_once_references_mask = collaborative_once_max_references-1;
class alignas(max_nfs_size) collaborative_once_runner : no_copy {
struct storage_t {
task_arena m_arena{ task_arena::attach{} };
wait_context m_wait_context{1};
};
std::atomic<std::int64_t> m_ref_count{0};
std::atomic<bool> m_is_ready{false};
// Storage with task_arena and wait_context must be initialized only by winner thread
union {
storage_t m_storage;
};
template<typename Fn>
void isolated_execute(Fn f) {
auto func = [f] {
f();
// delegate_base requires bool returning functor while isolate_within_arena ignores the result
return true;
};
delegated_function<decltype(func)> delegate(func);
r1::isolate_within_arena(delegate, reinterpret_cast<std::intptr_t>(this));
}
public:
class lifetime_guard : no_copy {
collaborative_once_runner& m_runner;
public:
lifetime_guard(collaborative_once_runner& r) : m_runner(r) {
m_runner.m_ref_count++;
}
~lifetime_guard() {
m_runner.m_ref_count--;
}
};
collaborative_once_runner() {}
~collaborative_once_runner() {
spin_wait_until_eq(m_ref_count, 0, std::memory_order_acquire);
if (m_is_ready.load(std::memory_order_relaxed)) {
m_storage.~storage_t();
}
}
std::uintptr_t to_bits() {
return reinterpret_cast<std::uintptr_t>(this);
}
static collaborative_once_runner* from_bits(std::uintptr_t bits) {
__TBB_ASSERT( (bits & collaborative_once_references_mask) == 0, "invalid pointer, last log2(max_nfs_size) bits must be zero" );
return reinterpret_cast<collaborative_once_runner*>(bits);
}
template <typename F>
void run_once(F&& f) {
__TBB_ASSERT(!m_is_ready.load(std::memory_order_relaxed), "storage with task_arena and wait_context is already initialized");
// Initialize internal state
new(&m_storage) storage_t();
m_storage.m_arena.execute([&] {
isolated_execute([&] {
task_group_context context{ task_group_context::bound,
task_group_context::default_traits | task_group_context::concurrent_wait };
function_stack_task<F> t{ std::forward<F>(f), m_storage.m_wait_context };
// Set the ready flag after entering the execute body to prevent
// moonlighting threads from occupying all slots inside the arena.
m_is_ready.store(true, std::memory_order_release);
execute_and_wait(t, context, m_storage.m_wait_context, context);
});
});
}
void assist() noexcept {
// Do not join the arena until the winner thread takes the slot
spin_wait_while_eq(m_is_ready, false);
m_storage.m_arena.execute([&] {
isolated_execute([&] {
// We do not want to get an exception from user functor on moonlighting threads.
// The exception is handled with the winner thread
task_group_context stub_context;
wait(m_storage.m_wait_context, stub_context);
});
});
}
};
class collaborative_once_flag : no_copy {
enum state : std::uintptr_t {
uninitialized,
done,
#if TBB_USE_ASSERT
dead
#endif
};
std::atomic<std::uintptr_t> m_state{ state::uninitialized };
template <typename Fn, typename... Args>
friend void collaborative_call_once(collaborative_once_flag& flag, Fn&& f, Args&&... args);
void set_completion_state(std::uintptr_t runner_bits, std::uintptr_t desired) {
std::uintptr_t expected = runner_bits;
do {
expected = runner_bits;
// Possible inefficiency: when we start waiting,
// some moonlighting threads might continue coming that will prolong our waiting.
// Fortunately, there are limited number of threads on the system so wait time is limited.
spin_wait_until_eq(m_state, expected);
} while (!m_state.compare_exchange_strong(expected, desired));
}
template <typename Fn>
void do_collaborative_call_once(Fn&& f) {
std::uintptr_t expected = m_state.load(std::memory_order_acquire);
collaborative_once_runner runner;
do {
if (expected == state::uninitialized && m_state.compare_exchange_strong(expected, runner.to_bits())) {
// Winner thread
runner.run_once([&] {
try_call([&] {
std::forward<Fn>(f)();
}).on_exception([&] {
// Reset the state to uninitialized to allow other threads to try initialization again
set_completion_state(runner.to_bits(), state::uninitialized);
});
// We successfully executed functor
set_completion_state(runner.to_bits(), state::done);
});
break;
} else {
// Moonlighting thread: we need to add a reference to the state to prolong runner lifetime.
// However, the maximum number of references are limited with runner alignment.
// So, we use CAS loop and spin_wait to guarantee that references never exceed "max_value".
do {
auto max_value = expected | collaborative_once_references_mask;
expected = spin_wait_while_eq(m_state, max_value);
// "expected > state::done" prevents storing values, when state is uninitialized or done
} while (expected > state::done && !m_state.compare_exchange_strong(expected, expected + 1));
if (auto shared_runner = collaborative_once_runner::from_bits(expected & ~collaborative_once_references_mask)) {
collaborative_once_runner::lifetime_guard guard{*shared_runner};
m_state.fetch_sub(1);
// The moonlighting threads are not expected to handle exceptions from user functor.
// Therefore, no exception is expected from assist().
shared_runner->assist();
}
}
__TBB_ASSERT(m_state.load(std::memory_order_relaxed) != state::dead,
"collaborative_once_flag has been prematurely destroyed");
} while (expected != state::done);
}
#if TBB_USE_ASSERT
public:
~collaborative_once_flag() {
m_state.store(state::dead, std::memory_order_relaxed);
}
#endif
};
template <typename Fn, typename... Args>
void collaborative_call_once(collaborative_once_flag& flag, Fn&& fn, Args&&... args) {
__TBB_ASSERT(flag.m_state.load(std::memory_order_relaxed) != collaborative_once_flag::dead,
"collaborative_once_flag has been prematurely destroyed");
if (flag.m_state.load(std::memory_order_acquire) != collaborative_once_flag::done) {
#if __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN
// Using stored_pack to suppress bug in GCC 4.8
// with parameter pack expansion in lambda
auto stored_pack = save_pack(std::forward<Args>(args)...);
auto func = [&] { call(std::forward<Fn>(fn), std::move(stored_pack)); };
#else
auto func = [&] { fn(std::forward<Args>(args)...); };
#endif
flag.do_collaborative_call_once(func);
}
}
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) // 4324 warning
#endif
} // namespace d1
} // namespace detail
using detail::d1::collaborative_call_once;
using detail::d1::collaborative_once_flag;
} // namespace tbb
#endif // __TBB_collaborative_call_once_H

70
third_party/tbb/combinable.hh vendored Normal file
View file

@ -0,0 +1,70 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_combinable_H
#define __TBB_combinable_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/enumerable_thread_specific.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
namespace tbb {
namespace detail {
namespace d1 {
/** \name combinable **/
//@{
//! Thread-local storage with optional reduction
/** @ingroup containers */
template <typename T>
class combinable {
using my_alloc = typename tbb::cache_aligned_allocator<T>;
using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>;
my_ets_type my_ets;
public:
combinable() = default;
template <typename Finit>
explicit combinable(Finit _finit) : my_ets(_finit) { }
void clear() { my_ets.clear(); }
T& local() { return my_ets.local(); }
T& local(bool& exists) { return my_ets.local(exists); }
// combine_func_t has signature T(T,T) or T(const T&, const T&)
template <typename CombineFunc>
T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); }
// combine_func_t has signature void(T) or void(const T&)
template <typename CombineFunc>
void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); }
};
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::combinable;
} // inline namespace v1
} // namespace tbb
#endif /* __TBB_combinable_H */

View file

@ -0,0 +1,85 @@
// clang-format off
/*
Copyright (c) 2020-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/concurrent_queue.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/tbb/concurrent_monitor.hh"
namespace tbb {
namespace detail {
namespace r1 {
static constexpr std::size_t monitors_number = 2;
std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size )
{
std::size_t monitors_mem_size = sizeof(concurrent_monitor) * monitors_number;
std::uint8_t* mem = static_cast<std::uint8_t*>(cache_aligned_allocate(queue_rep_size + monitors_mem_size));
concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size);
for (std::size_t i = 0; i < monitors_number; ++i) {
new (monitors + i) concurrent_monitor();
}
return mem;
}
void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size )
{
concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size);
for (std::size_t i = 0; i < monitors_number; ++i) {
monitors[i].~concurrent_monitor();
}
cache_aligned_deallocate(mem);
}
void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag,
std::ptrdiff_t target, d1::delegate_base& predicate )
{
__TBB_ASSERT(monitor_tag < monitors_number, nullptr);
concurrent_monitor& monitor = monitors[monitor_tag];
monitor.wait<concurrent_monitor::thread_context>([&] { return !predicate(); }, std::uintptr_t(target));
}
void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) {
concurrent_monitor& items_avail = monitors[d2::cbq_items_avail_tag];
concurrent_monitor& slots_avail = monitors[d2::cbq_slots_avail_tag];
items_avail.abort_all();
slots_avail.abort_all();
}
struct predicate_leq {
std::size_t my_ticket;
predicate_leq( std::size_t ticket ) : my_ticket(ticket) {}
bool operator() ( std::uintptr_t ticket ) const { return static_cast<std::size_t>(ticket) <= my_ticket; }
};
void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors,
std::size_t monitor_tag, std::size_t ticket)
{
__TBB_ASSERT(monitor_tag < monitors_number, nullptr);
concurrent_monitor& monitor = monitors[monitor_tag];
monitor.notify(predicate_leq(ticket));
}
} // namespace r1
} // namespace detail
} // namespace tbb

1665
third_party/tbb/concurrent_hash_map.hh vendored Normal file

File diff suppressed because it is too large Load diff

375
third_party/tbb/concurrent_lru_cache.hh vendored Normal file
View file

@ -0,0 +1,375 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_lru_cache_H
#define __TBB_concurrent_lru_cache_H
#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE
#error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h
#endif
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_aggregator.hh"
#include "third_party/libcxx/map" // for std::map
#include "third_party/libcxx/list" // for std::list
#include "third_party/libcxx/utility" // for std::make_pair
#include "third_party/libcxx/algorithm" // for std::find
#include "third_party/libcxx/atomic" // for std::atomic<bool>
namespace tbb {
namespace detail {
namespace d1 {
//-----------------------------------------------------------------------------
// Concurrent LRU cache
//-----------------------------------------------------------------------------
template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)>
class concurrent_lru_cache : no_assign {
// incapsulated helper classes
private:
struct handle_object;
struct storage_map_value_type;
struct aggregator_operation;
struct retrieve_aggregator_operation;
struct signal_end_of_usage_aggregator_operation;
// typedefs
public:
using key_type = KeyT;
using value_type = ValT;
using pointer = ValT*;
using reference = ValT&;
using const_pointer = const ValT*;
using const_reference = const ValT&;
using value_function_type = KeyToValFunctorT;
using handle = handle_object;
private:
using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>;
using storage_map_type = std::map<key_type, storage_map_value_type>;
using storage_map_iterator_type = typename storage_map_type::iterator;
using storage_map_pointer_type = typename storage_map_type::pointer;
using storage_map_reference_type = typename storage_map_type::reference;
using history_list_type = std::list<storage_map_iterator_type>;
using history_list_iterator_type = typename history_list_type::iterator;
using aggregator_operation_type = aggregator_operation;
using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>;
using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>;
friend class aggregating_functor<lru_cache_type,aggregator_operation_type>;
// fields
private:
value_function_type my_value_function;
aggregator_type my_aggregator;
storage_map_type my_storage_map; // storage map for used objects
history_list_type my_history_list; // history list for unused objects
const std::size_t my_history_list_capacity; // history list's allowed capacity
// interface
public:
concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity)
: my_value_function(value_function), my_history_list_capacity(cache_capacity) {
my_aggregator.initialize_handler(aggregator_function_type(this));
}
handle operator[](key_type key) {
retrieve_aggregator_operation op(key);
my_aggregator.execute(&op);
if (op.is_new_value_needed()) {
op.result().second.my_value = my_value_function(key);
op.result().second.my_is_ready.store(true, std::memory_order_release);
} else {
spin_wait_while_eq(op.result().second.my_is_ready, false);
}
return handle(*this, op.result());
}
private:
void handle_operations(aggregator_operation* op_list) {
while (op_list) {
op_list->cast_and_handle(*this);
aggregator_operation* prev_op = op_list;
op_list = op_list->next;
(prev_op->status).store(1, std::memory_order_release);
}
}
void signal_end_of_usage(storage_map_reference_type map_record_ref) {
signal_end_of_usage_aggregator_operation op(map_record_ref);
my_aggregator.execute(&op);
}
void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) {
storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first);
__TBB_ASSERT(map_it != my_storage_map.end(),
"cache should not return past-end iterators to outer world");
__TBB_ASSERT(&(*map_it) == &map_record_ref,
"dangling reference has been returned to outside world: data race?");
__TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(),
"object in use should not be in list of unused objects ");
// if it was the last reference, put it to the LRU history
if (! --(map_it->second.my_ref_counter)) {
// if the LRU history is full, evict the oldest items to get space
if (my_history_list.size() >= my_history_list_capacity) {
if (my_history_list_capacity == 0) {
// Since LRU history capacity is zero, there is no need to keep the element in history
my_storage_map.erase(map_it);
return;
}
std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity;
for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) {
storage_map_iterator_type map_it_to_evict = my_history_list.back();
__TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0,
"item to be evicted should not have a live references");
// TODO: can we use forward_list instead of list? pop_front / insert_after last
my_history_list.pop_back();
my_storage_map.erase(map_it_to_evict);
}
}
// TODO: can we use forward_list instead of list? pop_front / insert_after last
my_history_list.push_front(map_it);
map_it->second.my_history_list_iterator = my_history_list.begin();
}
}
storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) {
storage_map_iterator_type map_it = my_storage_map.find(key);
if (map_it == my_storage_map.end()) {
map_it = my_storage_map.emplace_hint(
map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false));
is_new_value_needed = true;
} else {
history_list_iterator_type list_it = map_it->second.my_history_list_iterator;
if (list_it != my_history_list.end()) {
__TBB_ASSERT(map_it->second.my_ref_counter == 0,
"item to be evicted should not have a live references");
// Item is going to be used. Therefore it is not a subject for eviction,
// so we remove it from LRU history.
my_history_list.erase(list_it);
map_it->second.my_history_list_iterator = my_history_list.end();
}
}
++(map_it->second.my_ref_counter);
return *map_it;
}
};
//-----------------------------------------------------------------------------
// Value type for storage map in concurrent LRU cache
//-----------------------------------------------------------------------------
template<typename KeyT, typename ValT, typename KeyToValFunctorT>
struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type {
//typedefs
public:
using ref_counter_type = std::size_t;
// fields
public:
value_type my_value;
ref_counter_type my_ref_counter;
history_list_iterator_type my_history_list_iterator;
std::atomic<bool> my_is_ready;
// interface
public:
storage_map_value_type(
value_type const& value, ref_counter_type ref_counter,
history_list_iterator_type history_list_iterator, bool is_ready)
: my_value(value), my_ref_counter(ref_counter),
my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {}
};
//-----------------------------------------------------------------------------
// Handle object for operator[] in concurrent LRU cache
//-----------------------------------------------------------------------------
template<typename KeyT, typename ValT, typename KeyToValFunctorT>
struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object {
// fields
private:
lru_cache_type* my_lru_cache_ptr;
storage_map_pointer_type my_map_record_ptr;
// interface
public:
handle_object()
: my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {}
handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref)
: my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {}
handle_object(handle_object&) = delete;
void operator=(handle_object&) = delete;
handle_object(handle_object&& other)
: my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) {
__TBB_ASSERT(
(other.my_lru_cache_ptr != nullptr && other.my_map_record_ptr != nullptr) ||
(other.my_lru_cache_ptr == nullptr && other.my_map_record_ptr == nullptr),
"invalid state of moving object?");
other.my_lru_cache_ptr = nullptr;
other.my_map_record_ptr = nullptr;
}
handle_object& operator=(handle_object&& other) {
__TBB_ASSERT(
(other.my_lru_cache_ptr != nullptr && other.my_map_record_ptr != nullptr) ||
(other.my_lru_cache_ptr == nullptr && other.my_map_record_ptr == nullptr),
"invalid state of moving object?");
if (my_lru_cache_ptr)
my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr);
my_lru_cache_ptr = other.my_lru_cache_ptr;
my_map_record_ptr = other.my_map_record_ptr;
other.my_lru_cache_ptr = nullptr;
other.my_map_record_ptr = nullptr;
return *this;
}
~handle_object() {
if (my_lru_cache_ptr)
my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr);
}
operator bool() const {
return (my_lru_cache_ptr && my_map_record_ptr);
}
value_type& value() {
__TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?");
__TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?");
return my_map_record_ptr->second.my_value;
}
};
//-----------------------------------------------------------------------------
// Aggregator operation for aggregator type in concurrent LRU cache
//-----------------------------------------------------------------------------
template<typename KeyT, typename ValT, typename KeyToValFunctorT>
struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation
: aggregated_operation<aggregator_operation> {
// incapsulated helper classes
public:
enum class op_type { retrieve, signal_end_of_usage };
// fields
private:
op_type my_op;
// interface
public:
aggregator_operation(op_type op) : my_op(op) {}
// TODO: aggregator_operation can be implemented
// - as a statically typed variant type or CRTP? (static, dependent on the use case)
// - or use pointer to function and apply_visitor (dynamic)
// - or use virtual functions (dynamic)
void cast_and_handle(lru_cache_type& lru_cache_ref) {
if (my_op == op_type::retrieve)
static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref);
else
static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref);
}
};
template<typename KeyT, typename ValT, typename KeyToValFunctorT>
struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation
: aggregator_operation, private no_assign {
public:
key_type my_key;
storage_map_pointer_type my_map_record_ptr;
bool my_is_new_value_needed;
public:
retrieve_aggregator_operation(key_type key)
: aggregator_operation(aggregator_operation::op_type::retrieve),
my_key(key), my_map_record_ptr(nullptr), my_is_new_value_needed(false) {}
void handle(lru_cache_type& lru_cache_ref) {
my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed);
}
storage_map_reference_type result() {
__TBB_ASSERT(my_map_record_ptr, "Attempt to call result() before calling handle()");
return *my_map_record_ptr;
}
bool is_new_value_needed() { return my_is_new_value_needed; }
};
template<typename KeyT, typename ValT, typename KeyToValFunctorT>
struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation
: aggregator_operation, private no_assign {
private:
storage_map_reference_type my_map_record_ref;
public:
signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref)
: aggregator_operation(aggregator_operation::op_type::signal_end_of_usage),
my_map_record_ref(map_record_ref) {}
void handle(lru_cache_type& lru_cache_ref) {
lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref);
}
};
// TODO: if we have guarantees that KeyToValFunctorT always have
// ValT as a return type and KeyT as an argument type
// we can deduce template parameters of concurrent_lru_cache
// by pattern matching on KeyToValFunctorT
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::concurrent_lru_cache;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_lru_cache_H

351
third_party/tbb/concurrent_map.hh vendored Normal file
View file

@ -0,0 +1,351 @@
// clang-format off
/*
Copyright (c) 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_map_H
#define __TBB_concurrent_map_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_concurrent_skip_list.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/libcxx/functional"
#include "third_party/libcxx/tuple"
#include "third_party/libcxx/utility"
namespace tbb {
namespace detail {
namespace d2 {
template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator,
typename Allocator, bool AllowMultimapping>
struct map_traits {
static constexpr std::size_t max_level = RandomGenerator::max_level;
using random_level_generator_type = RandomGenerator;
using key_type = Key;
using mapped_type = Value;
using compare_type = KeyCompare;
using value_type = std::pair<const key_type, mapped_type>;
using reference = value_type&;
using const_reference = const value_type&;
using allocator_type = Allocator;
static constexpr bool allow_multimapping = AllowMultimapping;
class value_compare {
public:
bool operator()(const value_type& lhs, const value_type& rhs) const {
return comp(lhs.first, rhs.first);
}
protected:
value_compare(compare_type c) : comp(c) {}
friend struct map_traits;
compare_type comp;
};
static value_compare value_comp(compare_type comp) { return value_compare(comp); }
static const key_type& get_key(const_reference val) {
return val.first;
}
}; // struct map_traits
template <typename Key, typename Value, typename Compare, typename Allocator>
class concurrent_multimap;
template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>>
class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> {
using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>;
public:
using key_type = Key;
using mapped_type = Value;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using key_compare = Compare;
using value_compare = typename base_type::value_compare;
using allocator_type = Allocator;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base type
using base_type::base_type;
// Required for implicit deduction guides
concurrent_map() = default;
concurrent_map( const concurrent_map& ) = default;
concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_map( concurrent_map&& ) = default;
concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_map& operator=( const concurrent_map& ) = default;
concurrent_map& operator=( concurrent_map&& ) = default;
concurrent_map& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
// Observers
mapped_type& at(const key_type& key) {
iterator it = this->find(key);
if (it == this->end()) {
throw_exception(exception_id::invalid_key);
}
return it->second;
}
const mapped_type& at(const key_type& key) const {
return const_cast<concurrent_map*>(this)->at(key);
}
mapped_type& operator[](const key_type& key) {
iterator it = this->find(key);
if (it == this->end()) {
it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first;
}
return it->second;
}
mapped_type& operator[](key_type&& key) {
iterator it = this->find(key);
if (it == this->end()) {
it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first;
}
return it->second;
}
using base_type::insert;
template <typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
std::pair<iterator, bool>>::type insert( P&& value )
{
return this->emplace(std::forward<P>(value));
}
template <typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
iterator>::type insert( const_iterator hint, P&& value )
{
return this->emplace_hint(hint, std::forward<P>(value));
}
template<typename OtherCompare>
void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
template<typename OtherCompare>
void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
}; // class concurrent_map
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Comp = std::less<iterator_key_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() )
-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>;
template <typename Key, typename T,
typename Comp = std::less<std::remove_const_t<Key>>,
typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() )
-> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_map( It, It, Alloc )
-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>,
std::less<iterator_key_t<It>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc )
-> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>;
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Value, typename Compare, typename Allocator>
void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs,
concurrent_map<Key, Value, Compare, Allocator>& rhs )
{
lhs.swap(rhs);
}
template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>>
class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> {
using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>;
public:
using key_type = Key;
using mapped_type = Value;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using key_compare = Compare;
using value_compare = typename base_type::value_compare;
using allocator_type = Allocator;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base_type
using base_type::base_type;
using base_type::insert;
// Required for implicit deduction guides
concurrent_multimap() = default;
concurrent_multimap( const concurrent_multimap& ) = default;
concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_multimap( concurrent_multimap&& ) = default;
concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_multimap& operator=( const concurrent_multimap& ) = default;
concurrent_multimap& operator=( concurrent_multimap&& ) = default;
concurrent_multimap& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template <typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
std::pair<iterator, bool>>::type insert( P&& value )
{
return this->emplace(std::forward<P>(value));
}
template <typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
iterator>::type insert( const_iterator hint, P&& value )
{
return this->emplace_hint(hint, std::forward<P>(value));
}
template<typename OtherCompare>
void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
template<typename OtherCompare>
void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
}; // class concurrent_multimap
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Comp = std::less<iterator_key_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() )
-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>;
template <typename Key, typename T,
typename Comp = std::less<std::remove_const_t<Key>>,
typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() )
-> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_multimap( It, It, Alloc )
-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>,
std::less<iterator_key_t<It>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc )
-> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>;
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Value, typename Compare, typename Allocator>
void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs,
concurrent_multimap<Key, Value, Compare, Allocator>& rhs )
{
lhs.swap(rhs);
}
} // namespace d2
} // namespace detail
inline namespace v1 {
using detail::d2::concurrent_map;
using detail::d2::concurrent_multimap;
using detail::split;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_map_H

489
third_party/tbb/concurrent_monitor.hh vendored Normal file
View file

@ -0,0 +1,489 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_monitor_H
#define __TBB_concurrent_monitor_H
#include "third_party/tbb/spin_mutex.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_aligned_space.hh"
#include "third_party/tbb/concurrent_monitor_mutex.hh"
#include "third_party/tbb/semaphore.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace r1 {
//! Circular doubly-linked list with sentinel
/** head.next points to the front and head.prev points to the back */
class circular_doubly_linked_list_with_sentinel {
public:
struct base_node {
base_node* next;
base_node* prev;
constexpr base_node(base_node* n, base_node* p) : next(n), prev(p) {}
explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {}
};
// ctor
constexpr circular_doubly_linked_list_with_sentinel() : count(0), head(&head, &head) {}
circular_doubly_linked_list_with_sentinel(const circular_doubly_linked_list_with_sentinel&) = delete;
circular_doubly_linked_list_with_sentinel& operator=(const circular_doubly_linked_list_with_sentinel&) = delete;
inline std::size_t size() const { return count.load(std::memory_order_relaxed); }
inline bool empty() const { return size() == 0; }
inline base_node* front() const { return head.next; }
inline base_node* last() const { return head.prev; }
inline const base_node* end() const { return &head; }
//! add to the back of the list
inline void add( base_node* n ) {
count.store(count.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
n->prev = head.prev;
n->next = &head;
head.prev->next = n;
head.prev = n;
}
//! remove node 'n'
inline void remove( base_node& n ) {
__TBB_ASSERT(count.load(std::memory_order_relaxed) > 0, "attempt to remove an item from an empty list");
count.store(count.load( std::memory_order_relaxed ) - 1, std::memory_order_relaxed);
n.prev->next = n.next;
n.next->prev = n.prev;
}
//! move all elements to 'lst' and initialize the 'this' list
inline void flush_to( circular_doubly_linked_list_with_sentinel& lst ) {
const std::size_t l_count = size();
if (l_count > 0) {
lst.count.store(l_count, std::memory_order_relaxed);
lst.head.next = head.next;
lst.head.prev = head.prev;
head.next->prev = &lst.head;
head.prev->next = &lst.head;
clear();
}
}
void clear() {
head.next = &head;
head.prev = &head;
count.store(0, std::memory_order_relaxed);
}
private:
std::atomic<std::size_t> count;
base_node head;
};
using base_list = circular_doubly_linked_list_with_sentinel;
using base_node = circular_doubly_linked_list_with_sentinel::base_node;
template <typename Context>
class concurrent_monitor_base;
template <typename Context>
class wait_node : public base_node {
public:
#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
wait_node(Context ctx) : my_context(ctx), my_is_in_list(false) {}
#else
wait_node(Context ctx) : my_context(ctx) {}
#endif
virtual ~wait_node() = default;
virtual void init() {
__TBB_ASSERT(!my_initialized, nullptr);
my_initialized = true;
}
virtual void wait() = 0;
virtual void reset() {
__TBB_ASSERT(my_skipped_wakeup, nullptr);
my_skipped_wakeup = false;
}
virtual void notify() = 0;
protected:
friend class concurrent_monitor_base<Context>;
friend class thread_data;
Context my_context{};
#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
std::atomic<bool> my_is_in_list;
#else
std::atomic<bool> my_is_in_list{false};
#endif
bool my_initialized{false};
bool my_skipped_wakeup{false};
bool my_aborted{false};
unsigned my_epoch{0};
};
template <typename Context>
class sleep_node : public wait_node<Context> {
using base_type = wait_node<Context>;
public:
using base_type::base_type;
~sleep_node() override {
if (this->my_initialized) {
if (this->my_skipped_wakeup) semaphore().P();
semaphore().~binary_semaphore();
}
}
binary_semaphore& semaphore() { return *sema.begin(); }
void init() override {
if (!this->my_initialized) {
new (sema.begin()) binary_semaphore;
base_type::init();
}
}
void wait() override {
__TBB_ASSERT(this->my_initialized,
"Use of commit_wait() without prior prepare_wait()");
semaphore().P();
__TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?");
if (this->my_aborted)
throw_exception(exception_id::user_abort);
}
void reset() override {
base_type::reset();
semaphore().P();
}
void notify() override {
semaphore().V();
}
private:
tbb::detail::aligned_space<binary_semaphore> sema;
};
//! concurrent_monitor
/** fine-grained concurrent_monitor implementation */
template <typename Context>
class concurrent_monitor_base {
public:
//! ctor
constexpr concurrent_monitor_base() {}
//! dtor
~concurrent_monitor_base() = default;
concurrent_monitor_base(const concurrent_monitor_base&) = delete;
concurrent_monitor_base& operator=(const concurrent_monitor_base&) = delete;
//! prepare wait by inserting 'thr' into the wait queue
void prepare_wait( wait_node<Context>& node) {
// TODO: consider making even more lazy instantiation of the semaphore, that is only when it is actually needed, e.g. move it in node::wait()
if (!node.my_initialized) {
node.init();
}
// this is good place to pump previous skipped wakeup
else if (node.my_skipped_wakeup) {
node.reset();
}
node.my_is_in_list.store(true, std::memory_order_relaxed);
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
node.my_epoch = my_epoch.load(std::memory_order_relaxed);
my_waitset.add(&node);
}
// Prepare wait guarantees Write Read memory barrier.
// In C++ only full fence covers this type of barrier.
atomic_fence_seq_cst();
}
//! Commit wait if event count has not changed; otherwise, cancel wait.
/** Returns true if committed, false if canceled. */
inline bool commit_wait( wait_node<Context>& node ) {
const bool do_it = node.my_epoch == my_epoch.load(std::memory_order_relaxed);
// this check is just an optimization
if (do_it) {
node.wait();
} else {
cancel_wait( node );
}
return do_it;
}
//! Cancel the wait. Removes the thread from the wait queue if not removed yet.
void cancel_wait( wait_node<Context>& node ) {
// possible skipped wakeup will be pumped in the following prepare_wait()
node.my_skipped_wakeup = true;
// try to remove node from waitset
// Cancel wait guarantees acquire memory barrier.
bool in_list = node.my_is_in_list.load(std::memory_order_acquire);
if (in_list) {
concurrent_monitor_mutex::scoped_lock l(my_mutex);
if (node.my_is_in_list.load(std::memory_order_relaxed)) {
my_waitset.remove(node);
// node is removed from waitset, so there will be no wakeup
node.my_is_in_list.store(false, std::memory_order_relaxed);
node.my_skipped_wakeup = false;
}
}
}
//! Wait for a condition to be satisfied with waiting-on my_context
template <typename NodeType, typename Pred>
bool wait(Pred&& pred, NodeType&& node) {
prepare_wait(node);
while (!guarded_call(std::forward<Pred>(pred), node)) {
if (commit_wait(node)) {
return true;
}
prepare_wait(node);
}
cancel_wait(node);
return false;
}
//! Notify one thread about the event
void notify_one() {
atomic_fence_seq_cst();
notify_one_relaxed();
}
//! Notify one thread about the event. Relaxed version.
void notify_one_relaxed() {
if (my_waitset.empty()) {
return;
}
base_node* n;
const base_node* end = my_waitset.end();
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
n = my_waitset.front();
if (n != end) {
my_waitset.remove(*n);
to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
}
}
if (n != end) {
to_wait_node(n)->notify();
}
}
//! Notify all waiting threads of the event
void notify_all() {
atomic_fence_seq_cst();
notify_all_relaxed();
}
// ! Notify all waiting threads of the event; Relaxed version
void notify_all_relaxed() {
if (my_waitset.empty()) {
return;
}
base_list temp;
const base_node* end;
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
// TODO: Possible optimization, don't change node state under lock, just do flush
my_waitset.flush_to(temp);
end = temp.end();
for (base_node* n = temp.front(); n != end; n = n->next) {
to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
}
}
base_node* nxt;
for (base_node* n = temp.front(); n != end; n=nxt) {
nxt = n->next;
to_wait_node(n)->notify();
}
#if TBB_USE_ASSERT
temp.clear();
#endif
}
//! Notify waiting threads of the event that satisfies the given predicate
template <typename P>
void notify( const P& predicate ) {
atomic_fence_seq_cst();
notify_relaxed( predicate );
}
//! Notify waiting threads of the event that satisfies the given predicate;
//! the predicate is called under the lock. Relaxed version.
template<typename P>
void notify_relaxed( const P& predicate ) {
if (my_waitset.empty()) {
return;
}
base_list temp;
base_node* nxt;
const base_node* end = my_waitset.end();
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed);
for (base_node* n = my_waitset.last(); n != end; n = nxt) {
nxt = n->prev;
auto* node = static_cast<wait_node<Context>*>(n);
if (predicate(node->my_context)) {
my_waitset.remove(*n);
node->my_is_in_list.store(false, std::memory_order_relaxed);
temp.add(n);
}
}
}
end = temp.end();
for (base_node* n=temp.front(); n != end; n = nxt) {
nxt = n->next;
to_wait_node(n)->notify();
}
#if TBB_USE_ASSERT
temp.clear();
#endif
}
//! Notify waiting threads of the event that satisfies the given predicate;
//! the predicate is called under the lock. Relaxed version.
template<typename P>
void notify_one_relaxed( const P& predicate ) {
if (my_waitset.empty()) {
return;
}
base_node* tmp = nullptr;
base_node* next{};
const base_node* end = my_waitset.end();
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed);
for (base_node* n = my_waitset.last(); n != end; n = next) {
next = n->prev;
auto* node = static_cast<wait_node<Context>*>(n);
if (predicate(node->my_context)) {
my_waitset.remove(*n);
node->my_is_in_list.store(false, std::memory_order_relaxed);
tmp = n;
break;
}
}
}
if (tmp) {
to_wait_node(tmp)->notify();
}
}
//! Abort any sleeping threads at the time of the call
void abort_all() {
atomic_fence_seq_cst();
abort_all_relaxed();
}
//! Abort any sleeping threads at the time of the call; Relaxed version
void abort_all_relaxed() {
if (my_waitset.empty()) {
return;
}
base_list temp;
const base_node* end;
{
concurrent_monitor_mutex::scoped_lock l(my_mutex);
my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
my_waitset.flush_to(temp);
end = temp.end();
for (base_node* n = temp.front(); n != end; n = n->next) {
to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
}
}
base_node* nxt;
for (base_node* n = temp.front(); n != end; n = nxt) {
nxt = n->next;
to_wait_node(n)->my_aborted = true;
to_wait_node(n)->notify();
}
#if TBB_USE_ASSERT
temp.clear();
#endif
}
void destroy() {
this->abort_all();
my_mutex.destroy();
__TBB_ASSERT(this->my_waitset.empty(), "waitset not empty?");
}
private:
template <typename NodeType, typename Pred>
bool guarded_call(Pred&& predicate, NodeType& node) {
bool res = false;
tbb::detail::d0::try_call( [&] {
res = std::forward<Pred>(predicate)();
}).on_exception( [&] {
cancel_wait(node);
});
return res;
}
concurrent_monitor_mutex my_mutex{};
base_list my_waitset{};
std::atomic<unsigned> my_epoch{};
wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); }
};
class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> {
using base_type = concurrent_monitor_base<std::uintptr_t>;
public:
using base_type::base_type;
~concurrent_monitor() {
destroy();
}
/** per-thread descriptor for concurrent_monitor */
using thread_context = sleep_node<std::uintptr_t>;
};
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* __TBB_concurrent_monitor_H */

View file

@ -0,0 +1,114 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_monitor_mutex_H
#define __TBB_monitor_mutex_H
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_aligned_space.hh"
#include "third_party/tbb/semaphore.hh"
#include "third_party/libcxx/mutex"
namespace tbb {
namespace detail {
namespace r1 {
class concurrent_monitor_mutex {
public:
using scoped_lock = std::lock_guard<concurrent_monitor_mutex>;
constexpr concurrent_monitor_mutex() {}
~concurrent_monitor_mutex() = default;
void destroy() {
#if !__TBB_USE_FUTEX
if (my_init_flag.load(std::memory_order_relaxed)) {
get_semaphore().~semaphore();
}
#endif
}
void lock() {
auto wakeup_condition = [&] {
return my_flag.load(std::memory_order_relaxed) == 0;
};
while (my_flag.exchange(1)) {
if (!timed_spin_wait_until(wakeup_condition)) {
++my_waiters;
while (!wakeup_condition()) {
wait();
}
--my_waiters;
}
}
}
void unlock() {
my_flag.exchange(0); // full fence, so the next load is relaxed
if (my_waiters.load(std::memory_order_relaxed)) {
wakeup();
}
}
private:
void wait() {
#if __TBB_USE_FUTEX
futex_wait(&my_flag, 1);
#else
get_semaphore().P();
#endif
}
void wakeup() {
#if __TBB_USE_FUTEX
futex_wakeup_one(&my_flag);
#else
get_semaphore().V();
#endif
}
// The flag should be int for the futex operations
std::atomic<int> my_flag{0};
std::atomic<int> my_waiters{0};
#if !__TBB_USE_FUTEX
semaphore& get_semaphore() {
if (!my_init_flag.load(std::memory_order_acquire)) {
std::lock_guard<std::mutex> lock(my_init_mutex);
if (!my_init_flag.load(std::memory_order_relaxed)) {
new (my_semaphore.begin()) semaphore();
my_init_flag.store(true, std::memory_order_release);
}
}
return *my_semaphore.begin();
}
static std::mutex my_init_mutex;
std::atomic<bool> my_init_flag{false};
aligned_space<semaphore> my_semaphore{};
#endif
};
} // namespace r1
} // namespace detail
} // namespace tbb
#endif // __TBB_monitor_mutex_H

View file

@ -0,0 +1,491 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_priority_queue_H
#define __TBB_concurrent_priority_queue_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_aggregator.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/tbb/detail/_range_common.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_containers_helpers.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/libcxx/vector"
#include "third_party/libcxx/iterator"
#include "third_party/libcxx/functional"
#include "third_party/libcxx/utility"
#include "third_party/libcxx/initializer_list"
#include "third_party/libcxx/type_traits"
namespace tbb {
namespace detail {
namespace d1 {
template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>>
class concurrent_priority_queue {
public:
using value_type = T;
using reference = T&;
using const_reference = const T&;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using allocator_type = Allocator;
concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {}
explicit concurrent_priority_queue( const allocator_type& alloc )
: mark(0), my_size(0), my_compare(), data(alloc)
{
my_aggregator.initialize_handler(functor{this});
}
explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() )
: mark(0), my_size(0), my_compare(compare), data(alloc)
{
my_aggregator.initialize_handler(functor{this});
}
explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() )
: mark(0), my_size(0), my_compare(), data(alloc)
{
data.reserve(init_capacity);
my_aggregator.initialize_handler(functor{this});
}
explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() )
: mark(0), my_size(0), my_compare(compare), data(alloc)
{
data.reserve(init_capacity);
my_aggregator.initialize_handler(functor{this});
}
template <typename InputIterator>
concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() )
: mark(0), my_compare(compare), data(begin, end, alloc)
{
my_aggregator.initialize_handler(functor{this});
heapify();
my_size.store(data.size(), std::memory_order_relaxed);
}
template <typename InputIterator>
concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() )
: concurrent_priority_queue(begin, end, Compare(), alloc) {}
concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() )
: concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {}
concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() )
: concurrent_priority_queue(init, Compare(), alloc) {}
concurrent_priority_queue( const concurrent_priority_queue& other )
: mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
data(other.data)
{
my_aggregator.initialize_handler(functor{this});
}
concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc )
: mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
data(other.data, alloc)
{
my_aggregator.initialize_handler(functor{this});
}
concurrent_priority_queue( concurrent_priority_queue&& other )
: mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
data(std::move(other.data))
{
my_aggregator.initialize_handler(functor{this});
}
concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc )
: mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare),
data(std::move(other.data), alloc)
{
my_aggregator.initialize_handler(functor{this});
}
concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) {
if (this != &other) {
data = other.data;
mark = other.mark;
my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
}
return *this;
}
concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) {
if (this != &other) {
// TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately
data = std::move(other.data);
mark = other.mark;
my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
}
return *this;
}
concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) {
assign(init.begin(), init.end());
return *this;
}
template <typename InputIterator>
void assign( InputIterator begin, InputIterator end ) {
data.assign(begin, end);
mark = 0;
my_size.store(data.size(), std::memory_order_relaxed);
heapify();
}
void assign( std::initializer_list<value_type> init ) {
assign(init.begin(), init.end());
}
/* Returned value may not reflect results of pending operations.
This operation reads shared data and will trigger a race condition. */
__TBB_nodiscard bool empty() const { return size() == 0; }
// Returns the current number of elements contained in the queue
/* Returned value may not reflect results of pending operations.
This operation reads shared data and will trigger a race condition. */
size_type size() const { return my_size.load(std::memory_order_relaxed); }
/* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
void push( const value_type& value ) {
cpq_operation op_data(value, PUSH_OP);
my_aggregator.execute(&op_data);
if (op_data.status == FAILED)
throw_exception(exception_id::bad_alloc);
}
/* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
void push( value_type&& value ) {
cpq_operation op_data(value, PUSH_RVALUE_OP);
my_aggregator.execute(&op_data);
if (op_data.status == FAILED)
throw_exception(exception_id::bad_alloc);
}
/* This operation can be safely used concurrently with other push, try_pop or emplace operations. */
template <typename... Args>
void emplace( Args&&... args ) {
// TODO: support uses allocator construction in this place
push(value_type(std::forward<Args>(args)...));
}
// Gets a reference to and removes highest priority element
/* If a highest priority element was found, sets elem and returns true,
otherwise returns false.
This operation can be safely used concurrently with other push, try_pop or emplace operations. */
bool try_pop( value_type& value ) {
cpq_operation op_data(value, POP_OP);
my_aggregator.execute(&op_data);
return op_data.status == SUCCEEDED;
}
// This operation affects the whole container => it is not thread-safe
void clear() {
data.clear();
mark = 0;
my_size.store(0, std::memory_order_relaxed);
}
// This operation affects the whole container => it is not thread-safe
void swap( concurrent_priority_queue& other ) {
if (this != &other) {
using std::swap;
swap(data, other.data);
swap(mark, other.mark);
size_type sz = my_size.load(std::memory_order_relaxed);
my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
other.my_size.store(sz, std::memory_order_relaxed);
}
}
allocator_type get_allocator() const { return data.get_allocator(); }
private:
enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP};
enum operation_status {WAIT = 0, SUCCEEDED, FAILED};
class cpq_operation : public aggregated_operation<cpq_operation> {
public:
operation_type type;
union {
value_type* elem;
size_type sz;
};
cpq_operation( const value_type& value, operation_type t )
: type(t), elem(const_cast<value_type*>(&value)) {}
}; // class cpq_operation
class functor {
concurrent_priority_queue* my_cpq;
public:
functor() : my_cpq(nullptr) {}
functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {}
void operator()(cpq_operation* op_list) {
__TBB_ASSERT(my_cpq != nullptr, "Invalid functor");
my_cpq->handle_operations(op_list);
}
}; // class functor
void handle_operations( cpq_operation* op_list ) {
call_itt_notify(acquired, this);
cpq_operation* tmp, *pop_list = nullptr;
__TBB_ASSERT(mark == data.size(), nullptr);
// First pass processes all constant (amortized; reallocation may happen) time pushes and pops.
while(op_list) {
// ITT note: &(op_list->status) tag is used to cover accesses to op_list
// node. This thread is going to handle the operation, and so will acquire it
// and perform the associated operation w/o triggering a race condition; the
// thread that created the operation is waiting on the status field, so when
// this thread is done with the operation, it will perform a
// store_with_release to give control back to the waiting thread in
// aggregator::insert_operation.
// TODO: enable
call_itt_notify(acquired, &(op_list->status));
__TBB_ASSERT(op_list->type != INVALID_OP, nullptr);
tmp = op_list;
op_list = op_list->next.load(std::memory_order_relaxed);
if (tmp->type == POP_OP) {
if (mark < data.size() &&
my_compare(data[0], data.back()))
{
// there are newly pushed elems and the last one is higher than top
*(tmp->elem) = std::move(data.back());
my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
data.pop_back();
__TBB_ASSERT(mark <= data.size(), nullptr);
} else { // no convenient item to pop; postpone
tmp->next.store(pop_list, std::memory_order_relaxed);
pop_list = tmp;
}
} else { // PUSH_OP or PUSH_RVALUE_OP
__TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation");
#if TBB_USE_EXCEPTIONS
try
#endif
{
if (tmp->type == PUSH_OP) {
push_back_helper(*(tmp->elem));
} else {
data.push_back(std::move(*(tmp->elem)));
}
my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
}
#if TBB_USE_EXCEPTIONS
catch(...) {
tmp->status.store(uintptr_t(FAILED), std::memory_order_release);
}
#endif
}
}
// Second pass processes pop operations
while(pop_list) {
tmp = pop_list;
pop_list = pop_list->next.load(std::memory_order_relaxed);
__TBB_ASSERT(tmp->type == POP_OP, nullptr);
if (data.empty()) {
tmp->status.store(uintptr_t(FAILED), std::memory_order_release);
} else {
__TBB_ASSERT(mark <= data.size(), nullptr);
if (mark < data.size() &&
my_compare(data[0], data.back()))
{
// there are newly pushed elems and the last one is higher than top
*(tmp->elem) = std::move(data.back());
my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
data.pop_back();
} else { // extract top and push last element down heap
*(tmp->elem) = std::move(data[0]);
my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release);
reheap();
}
}
}
// heapify any leftover pushed elements before doing the next
// batch of operations
if (mark < data.size()) heapify();
__TBB_ASSERT(mark == data.size(), nullptr);
call_itt_notify(releasing, this);
}
// Merge unsorted elements into heap
void heapify() {
if (!mark && data.size() > 0) mark = 1;
for (; mark < data.size(); ++mark) {
// for each unheapified element under size
size_type cur_pos = mark;
value_type to_place = std::move(data[mark]);
do { // push to_place up the heap
size_type parent = (cur_pos - 1) >> 1;
if (!my_compare(data[parent], to_place))
break;
data[cur_pos] = std::move(data[parent]);
cur_pos = parent;
} while(cur_pos);
data[cur_pos] = std::move(to_place);
}
}
// Re-heapify after an extraction
// Re-heapify by pushing last element down the heap from the root.
void reheap() {
size_type cur_pos = 0, child = 1;
while(child < mark) {
size_type target = child;
if (child + 1 < mark && my_compare(data[child], data[child + 1]))
++target;
// target now has the higher priority child
if (my_compare(data[target], data.back()))
break;
data[cur_pos] = std::move(data[target]);
cur_pos = target;
child = (cur_pos << 1) + 1;
}
if (cur_pos != data.size() - 1)
data[cur_pos] = std::move(data.back());
data.pop_back();
if (mark > data.size()) mark = data.size();
}
void push_back_helper( const T& value ) {
push_back_helper_impl(value, std::is_copy_constructible<T>{});
}
void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) {
data.push_back(value);
}
void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) {
__TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type");
}
using aggregator_type = aggregator<functor, cpq_operation>;
aggregator_type my_aggregator;
// Padding added to avoid false sharing
char padding1[max_nfs_size - sizeof(aggregator_type)];
// The point at which unsorted elements begin
size_type mark;
std::atomic<size_type> my_size;
Compare my_compare;
// Padding added to avoid false sharing
char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)];
//! Storage for the heap of elements in queue, plus unheapified elements
/** data has the following structure:
binary unheapified
heap elements
____|_______|____
| | |
v v v
[_|...|_|_|...|_| |...| ]
0 ^ ^ ^
| | |__capacity
| |__my_size
|__mark
Thus, data stores the binary heap starting at position 0 through
mark-1 (it may be empty). Then there are 0 or more elements
that have not yet been inserted into the heap, in positions
mark through my_size-1. */
using vector_type = std::vector<value_type, allocator_type>;
vector_type data;
friend bool operator==( const concurrent_priority_queue& lhs,
const concurrent_priority_queue& rhs )
{
return lhs.data == rhs.data;
}
#if !__TBB_CPP20_COMPARISONS_PRESENT
friend bool operator!=( const concurrent_priority_queue& lhs,
const concurrent_priority_queue& rhs )
{
return !(lhs == rhs);
}
#endif
}; // class concurrent_priority_queue
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Comp = std::less<iterator_value_t<It>>,
typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() )
-> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_priority_queue( It, It, Alloc )
-> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>;
template <typename T,
typename Comp = std::less<T>,
typename Alloc = tbb::cache_aligned_allocator<T>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() )
-> concurrent_priority_queue<T, Comp, Alloc>;
template <typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_priority_queue( std::initializer_list<T>, Alloc )
-> concurrent_priority_queue<T, std::less<T>, Alloc>;
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename T, typename Compare, typename Allocator>
void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs,
concurrent_priority_queue<T, Compare, Allocator>& rhs )
{
lhs.swap(rhs);
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::concurrent_priority_queue;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_priority_queue_H

701
third_party/tbb/concurrent_queue.hh vendored Normal file
View file

@ -0,0 +1,701 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_queue_H
#define __TBB_concurrent_queue_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_concurrent_queue_base.hh"
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_containers_helpers.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
namespace tbb {
namespace detail {
namespace d2 {
template <typename QueueRep, typename Allocator>
std::pair<bool, ticket_type> internal_try_pop_impl(void* dst, QueueRep& queue, Allocator& alloc ) {
ticket_type ticket{};
do {
// Basically, we need to read `head_counter` before `tail_counter`. To achieve it we build happens-before on `head_counter`
ticket = queue.head_counter.load(std::memory_order_acquire);
do {
if (static_cast<std::ptrdiff_t>(queue.tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty
// Queue is empty
return { false, ticket };
}
// Queue had item with ticket k when we looked. Attempt to get that item.
// Another thread snatched the item, retry.
} while (!queue.head_counter.compare_exchange_strong(ticket, ticket + 1));
} while (!queue.choose(ticket).pop(dst, ticket, queue, alloc));
return { true, ticket };
}
// A high-performance thread-safe non-blocking concurrent queue.
// Multiple threads may each push and pop concurrently.
// Assignment construction is not allowed.
template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
class concurrent_queue {
using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
using queue_representation_type = concurrent_queue_rep<T, Allocator>;
using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>;
using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>;
public:
using size_type = std::size_t;
using value_type = T;
using reference = T&;
using const_reference = const T&;
using difference_type = std::ptrdiff_t;
using allocator_type = Allocator;
using pointer = typename allocator_traits_type::pointer;
using const_pointer = typename allocator_traits_type::const_pointer;
using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>;
using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>;
concurrent_queue() : concurrent_queue(allocator_type()) {}
explicit concurrent_queue(const allocator_type& a) :
my_allocator(a), my_queue_representation(nullptr)
{
my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type)));
queue_allocator_traits::construct(my_allocator, my_queue_representation);
__TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" );
}
template <typename InputIterator>
concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
concurrent_queue(a)
{
for (; begin != end; ++begin)
push(*begin);
}
concurrent_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ) :
concurrent_queue(init.begin(), init.end(), alloc)
{}
concurrent_queue(const concurrent_queue& src, const allocator_type& a) :
concurrent_queue(a)
{
my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item);
}
concurrent_queue(const concurrent_queue& src) :
concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator()))
{
my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item);
}
// Move constructors
concurrent_queue(concurrent_queue&& src) :
concurrent_queue(std::move(src.my_allocator))
{
internal_swap(src);
}
concurrent_queue(concurrent_queue&& src, const allocator_type& a) :
concurrent_queue(a)
{
// checking that memory allocated by one instance of allocator can be deallocated
// with another
if (my_allocator == src.my_allocator) {
internal_swap(src);
} else {
// allocators are different => performing per-element move
my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item);
src.clear();
}
}
// Destroy queue
~concurrent_queue() {
clear();
my_queue_representation->clear(my_allocator);
queue_allocator_traits::destroy(my_allocator, my_queue_representation);
r1::cache_aligned_deallocate(my_queue_representation);
}
concurrent_queue& operator=( const concurrent_queue& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_copy_assignment
if (my_queue_representation != other.my_queue_representation) {
clear();
my_allocator = other.my_allocator;
my_queue_representation->assign(*other.my_queue_representation, my_allocator, copy_construct_item);
}
return *this;
}
concurrent_queue& operator=( concurrent_queue&& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_move_assignment
if (my_queue_representation != other.my_queue_representation) {
clear();
if (my_allocator == other.my_allocator) {
internal_swap(other);
} else {
my_queue_representation->assign(*other.my_queue_representation, other.my_allocator, move_construct_item);
other.clear();
my_allocator = std::move(other.my_allocator);
}
}
return *this;
}
concurrent_queue& operator=( std::initializer_list<value_type> init ) {
assign(init);
return *this;
}
template <typename InputIterator>
void assign( InputIterator first, InputIterator last ) {
concurrent_queue src(first, last);
clear();
my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item);
}
void assign( std::initializer_list<value_type> init ) {
assign(init.begin(), init.end());
}
void swap ( concurrent_queue& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_swap
__TBB_ASSERT(my_allocator == other.my_allocator, "unequal allocators");
internal_swap(other);
}
// Enqueue an item at tail of queue.
void push(const T& value) {
internal_push(value);
}
void push(T&& value) {
internal_push(std::move(value));
}
template <typename... Args>
void emplace( Args&&... args ) {
internal_push(std::forward<Args>(args)...);
}
// Attempt to dequeue an item from head of queue.
/** Does not wait for item to become available.
Returns true if successful; false otherwise. */
bool try_pop( T& result ) {
return internal_try_pop(&result);
}
// Return the number of items in the queue; thread unsafe
size_type unsafe_size() const {
std::ptrdiff_t size = my_queue_representation->size();
return size < 0 ? 0 : size_type(size);
}
// Equivalent to size()==0.
__TBB_nodiscard bool empty() const {
return my_queue_representation->empty();
}
// Clear the queue. not thread-safe.
void clear() {
my_queue_representation->clear(my_allocator);
}
// Return allocator object
allocator_type get_allocator() const { return my_allocator; }
//------------------------------------------------------------------------
// The iterators are intended only for debugging. They are slow and not thread safe.
//------------------------------------------------------------------------
iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); }
iterator unsafe_end() { return iterator(); }
const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
const_iterator unsafe_end() const { return const_iterator(); }
const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
const_iterator unsafe_cend() const { return const_iterator(); }
private:
void internal_swap(concurrent_queue& src) {
using std::swap;
swap(my_queue_representation, src.my_queue_representation);
}
template <typename... Args>
void internal_push( Args&&... args ) {
ticket_type k = my_queue_representation->tail_counter++;
my_queue_representation->choose(k).push(k, *my_queue_representation, my_allocator, std::forward<Args>(args)...);
}
bool internal_try_pop( void* dst ) {
return internal_try_pop_impl(dst, *my_queue_representation, my_allocator).first;
}
template <typename Container, typename Value, typename A>
friend class concurrent_queue_iterator;
static void copy_construct_item(T* location, const void* src) {
// TODO: use allocator_traits for copy construction
new (location) value_type(*static_cast<const value_type*>(src));
// queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src));
}
static void move_construct_item(T* location, const void* src) {
// TODO: use allocator_traits for move construction
new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src))));
}
queue_allocator_type my_allocator;
queue_representation_type* my_queue_representation;
friend void swap( concurrent_queue& lhs, concurrent_queue& rhs ) {
lhs.swap(rhs);
}
friend bool operator==( const concurrent_queue& lhs, const concurrent_queue& rhs ) {
return lhs.unsafe_size() == rhs.unsafe_size() && std::equal(lhs.unsafe_begin(), lhs.unsafe_end(), rhs.unsafe_begin());
}
#if !__TBB_CPP20_COMPARISONS_PRESENT
friend bool operator!=( const concurrent_queue& lhs, const concurrent_queue& rhs ) {
return !(lhs == rhs);
}
#endif // __TBB_CPP20_COMPARISONS_PRESENT
}; // class concurrent_queue
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
// Deduction guide for the constructor from two iterators
template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_queue( It, It, Alloc = Alloc() )
-> concurrent_queue<iterator_value_t<It>, Alloc>;
#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
class concurrent_monitor;
// The concurrent monitor tags for concurrent_bounded_queue.
static constexpr std::size_t cbq_slots_avail_tag = 0;
static constexpr std::size_t cbq_items_avail_tag = 1;
} // namespace d2
namespace r1 {
class concurrent_monitor;
TBB_EXPORT std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size );
TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size );
TBB_EXPORT void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors );
TBB_EXPORT void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag
, std::size_t ticket );
TBB_EXPORT void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag,
std::ptrdiff_t target, d1::delegate_base& predicate );
} // namespace r1
namespace d2 {
// A high-performance thread-safe blocking concurrent bounded queue.
// Supports boundedness and blocking semantics.
// Multiple threads may each push and pop concurrently.
// Assignment construction is not allowed.
template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>>
class concurrent_bounded_queue {
using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
using queue_representation_type = concurrent_queue_rep<T, Allocator>;
using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>;
using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>;
template <typename FuncType>
void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) {
d1::delegated_function<FuncType> func(pred);
r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func);
}
public:
using size_type = std::ptrdiff_t;
using value_type = T;
using reference = T&;
using const_reference = const T&;
using difference_type = std::ptrdiff_t;
using allocator_type = Allocator;
using pointer = typename allocator_traits_type::pointer;
using const_pointer = typename allocator_traits_type::const_pointer;
using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>;
using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ;
concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {}
explicit concurrent_bounded_queue( const allocator_type& a ) :
my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr)
{
my_queue_representation = reinterpret_cast<queue_representation_type*>(
r1::allocate_bounded_queue_rep(sizeof(queue_representation_type)));
my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1);
queue_allocator_traits::construct(my_allocator, my_queue_representation);
my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2);
__TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" );
__TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" );
}
template <typename InputIterator>
concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) :
concurrent_bounded_queue(a)
{
for (; begin != end; ++begin)
push(*begin);
}
concurrent_bounded_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ):
concurrent_bounded_queue(init.begin(), init.end(), alloc)
{}
concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) :
concurrent_bounded_queue(a)
{
my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item);
}
concurrent_bounded_queue( const concurrent_bounded_queue& src ) :
concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator()))
{
my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item);
}
// Move constructors
concurrent_bounded_queue( concurrent_bounded_queue&& src ) :
concurrent_bounded_queue(std::move(src.my_allocator))
{
internal_swap(src);
}
concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) :
concurrent_bounded_queue(a)
{
// checking that memory allocated by one instance of allocator can be deallocated
// with another
if (my_allocator == src.my_allocator) {
internal_swap(src);
} else {
// allocators are different => performing per-element move
my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item);
src.clear();
}
}
// Destroy queue
~concurrent_bounded_queue() {
clear();
my_queue_representation->clear(my_allocator);
queue_allocator_traits::destroy(my_allocator, my_queue_representation);
r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation),
sizeof(queue_representation_type));
}
concurrent_bounded_queue& operator=( const concurrent_bounded_queue& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_copy_assignment
if (my_queue_representation != other.my_queue_representation) {
clear();
my_allocator = other.my_allocator;
my_queue_representation->assign(*other.my_queue_representation, my_allocator, copy_construct_item);
}
return *this;
}
concurrent_bounded_queue& operator=( concurrent_bounded_queue&& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_move_assignment
if (my_queue_representation != other.my_queue_representation) {
clear();
if (my_allocator == other.my_allocator) {
internal_swap(other);
} else {
my_queue_representation->assign(*other.my_queue_representation, other.my_allocator, move_construct_item);
other.clear();
my_allocator = std::move(other.my_allocator);
}
}
return *this;
}
concurrent_bounded_queue& operator=( std::initializer_list<value_type> init ) {
assign(init);
return *this;
}
template <typename InputIterator>
void assign( InputIterator first, InputIterator last ) {
concurrent_bounded_queue src(first, last);
clear();
my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item);
}
void assign( std::initializer_list<value_type> init ) {
assign(init.begin(), init.end());
}
void swap ( concurrent_bounded_queue& other ) {
//TODO: implement support for std::allocator_traits::propagate_on_container_swap
__TBB_ASSERT(my_allocator == other.my_allocator, "unequal allocators");
internal_swap(other);
}
// Enqueue an item at tail of queue.
void push( const T& value ) {
internal_push(value);
}
void push( T&& value ) {
internal_push(std::move(value));
}
// Enqueue an item at tail of queue if queue is not already full.
// Does not wait for queue to become not full.
// Returns true if item is pushed; false if queue was already full.
bool try_push( const T& value ) {
return internal_push_if_not_full(value);
}
bool try_push( T&& value ) {
return internal_push_if_not_full(std::move(value));
}
template <typename... Args>
void emplace( Args&&... args ) {
internal_push(std::forward<Args>(args)...);
}
template <typename... Args>
bool try_emplace( Args&&... args ) {
return internal_push_if_not_full(std::forward<Args>(args)...);
}
// Attempt to dequeue an item from head of queue.
void pop( T& result ) {
internal_pop(&result);
}
/** Does not wait for item to become available.
Returns true if successful; false otherwise. */
bool try_pop( T& result ) {
return internal_pop_if_present(&result);
}
void abort() {
internal_abort();
}
// Return the number of items in the queue; thread unsafe
std::ptrdiff_t size() const {
return my_queue_representation->size();
}
void set_capacity( size_type new_capacity ) {
std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity;
my_capacity = c;
}
size_type capacity() const {
return my_capacity;
}
// Equivalent to size()==0.
__TBB_nodiscard bool empty() const {
return my_queue_representation->empty();
}
// Clear the queue. not thread-safe.
void clear() {
my_queue_representation->clear(my_allocator);
}
// Return allocator object
allocator_type get_allocator() const { return my_allocator; }
//------------------------------------------------------------------------
// The iterators are intended only for debugging. They are slow and not thread safe.
//------------------------------------------------------------------------
iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); }
iterator unsafe_end() { return iterator(); }
const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
const_iterator unsafe_end() const { return const_iterator(); }
const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); }
const_iterator unsafe_cend() const { return const_iterator(); }
private:
void internal_swap( concurrent_bounded_queue& src ) {
std::swap(my_queue_representation, src.my_queue_representation);
std::swap(my_monitors, src.my_monitors);
}
static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2);
template <typename... Args>
void internal_push( Args&&... args ) {
unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed);
ticket_type ticket = my_queue_representation->tail_counter++;
std::ptrdiff_t target = ticket - my_capacity;
if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full
auto pred = [&] {
if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) {
throw_exception(exception_id::user_abort);
}
return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target;
};
try_call( [&] {
internal_wait(my_monitors, cbq_slots_avail_tag, target, pred);
}).on_exception( [&] {
my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation, my_allocator);
});
}
__TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr);
my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward<Args>(args)...);
r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket);
}
template <typename... Args>
bool internal_push_if_not_full( Args&&... args ) {
ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed);
do {
if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) {
// Queue is full
return false;
}
// Queue had empty slot with ticket k when we looked. Attempt to claim that slot.
// Another thread claimed the slot, so retry.
} while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1));
my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward<Args>(args)...);
r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket);
return true;
}
void internal_pop( void* dst ) {
std::ptrdiff_t target;
// This loop is a single pop operation; abort_counter should not be re-read inside
unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed);
do {
target = my_queue_representation->head_counter++;
if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) {
auto pred = [&] {
if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) {
throw_exception(exception_id::user_abort);
}
return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target;
};
try_call( [&] {
internal_wait(my_monitors, cbq_items_avail_tag, target, pred);
}).on_exception( [&] {
my_queue_representation->head_counter--;
});
}
__TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr);
} while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation, my_allocator));
r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target);
}
bool internal_pop_if_present( void* dst ) {
bool present{};
ticket_type ticket{};
std::tie(present, ticket) = internal_try_pop_impl(dst, *my_queue_representation, my_allocator);
if (present) {
r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket);
}
return present;
}
void internal_abort() {
++my_abort_counter;
r1::abort_bounded_queue_monitors(my_monitors);
}
static void copy_construct_item(T* location, const void* src) {
// TODO: use allocator_traits for copy construction
new (location) value_type(*static_cast<const value_type*>(src));
}
static void move_construct_item(T* location, const void* src) {
// TODO: use allocator_traits for move construction
new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src))));
}
template <typename Container, typename Value, typename A>
friend class concurrent_queue_iterator;
queue_allocator_type my_allocator;
std::ptrdiff_t my_capacity;
std::atomic<unsigned> my_abort_counter;
queue_representation_type* my_queue_representation;
r1::concurrent_monitor* my_monitors;
friend void swap( concurrent_bounded_queue& lhs, concurrent_bounded_queue& rhs ) {
lhs.swap(rhs);
}
friend bool operator==( const concurrent_bounded_queue& lhs, const concurrent_bounded_queue& rhs ) {
return lhs.size() == rhs.size() && std::equal(lhs.unsafe_begin(), lhs.unsafe_end(), rhs.unsafe_begin());
}
#if !__TBB_CPP20_COMPARISONS_PRESENT
friend bool operator!=( const concurrent_bounded_queue& lhs, const concurrent_bounded_queue& rhs ) {
return !(lhs == rhs);
}
#endif // __TBB_CPP20_COMPARISONS_PRESENT
}; // class concurrent_bounded_queue
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
// Deduction guide for the constructor from two iterators
template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>>
concurrent_bounded_queue( It, It, Alloc = Alloc() )
-> concurrent_bounded_queue<iterator_value_t<It>, Alloc>;
#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
} //namespace d2
} // namesapce detail
inline namespace v1 {
using detail::d2::concurrent_queue;
using detail::d2::concurrent_bounded_queue;
using detail::r1::user_abort;
using detail::r1::bad_last_alloc;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_queue_H

268
third_party/tbb/concurrent_set.hh vendored Normal file
View file

@ -0,0 +1,268 @@
// clang-format off
/*
Copyright (c) 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_set_H
#define __TBB_concurrent_set_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_concurrent_skip_list.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/libcxx/functional"
#include "third_party/libcxx/utility"
namespace tbb {
namespace detail {
namespace d2 {
template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping>
struct set_traits {
static constexpr std::size_t max_level = RandomGenerator::max_level;
using random_level_generator_type = RandomGenerator;
using key_type = Key;
using value_type = key_type;
using compare_type = KeyCompare;
using value_compare = compare_type;
using reference = value_type&;
using const_reference = const value_type&;
using allocator_type = Allocator;
static constexpr bool allow_multimapping = AllowMultimapping;
static const key_type& get_key(const_reference val) {
return val;
}
static value_compare value_comp(compare_type comp) { return comp; }
}; // struct set_traits
template <typename Key, typename Compare, typename Allocator>
class concurrent_multiset;
template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>>
class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> {
using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>;
public:
using key_type = Key;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using key_compare = Compare;
using value_compare = typename base_type::value_compare;
using allocator_type = Allocator;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base_type
using base_type::base_type;
// Required for implicit deduction guides
concurrent_set() = default;
concurrent_set( const concurrent_set& ) = default;
concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_set( concurrent_set&& ) = default;
concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_set& operator=( const concurrent_set& ) = default;
concurrent_set& operator=( concurrent_set&& ) = default;
concurrent_set& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template<typename OtherCompare>
void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
template<typename OtherCompare>
void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
}; // class concurrent_set
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Comp = std::less<iterator_value_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() )
-> concurrent_set<iterator_value_t<It>, Comp, Alloc>;
template <typename Key,
typename Comp = std::less<Key>,
typename Alloc = tbb::tbb_allocator<Key>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() )
-> concurrent_set<Key, Comp, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_set( It, It, Alloc )
-> concurrent_set<iterator_value_t<It>,
std::less<iterator_value_t<It>>, Alloc>;
template <typename Key, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_set( std::initializer_list<Key>, Alloc )
-> concurrent_set<Key, std::less<Key>, Alloc>;
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Compare, typename Allocator>
void swap( concurrent_set<Key, Compare, Allocator>& lhs,
concurrent_set<Key, Compare, Allocator>& rhs )
{
lhs.swap(rhs);
}
template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>>
class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> {
using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>;
public:
using key_type = Key;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using key_compare = Compare;
using value_compare = typename base_type::value_compare;
using allocator_type = Allocator;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base_type;
using base_type::base_type;
// Required for implicit deduction guides
concurrent_multiset() = default;
concurrent_multiset( const concurrent_multiset& ) = default;
concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_multiset( concurrent_multiset&& ) = default;
concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_multiset& operator=( const concurrent_multiset& ) = default;
concurrent_multiset& operator=( concurrent_multiset&& ) = default;
concurrent_multiset& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template<typename OtherCompare>
void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
template<typename OtherCompare>
void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) {
this->internal_merge(source);
}
template<typename OtherCompare>
void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) {
this->internal_merge(std::move(source));
}
}; // class concurrent_multiset
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Comp = std::less<iterator_value_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() )
-> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>;
template <typename Key,
typename Comp = std::less<Key>,
typename Alloc = tbb::tbb_allocator<Key>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Comp>>>
concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() )
-> concurrent_multiset<Key, Comp, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_multiset( It, It, Alloc )
-> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>;
template <typename Key, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_multiset( std::initializer_list<Key>, Alloc )
-> concurrent_multiset<Key, std::less<Key>, Alloc>;
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Compare, typename Allocator>
void swap( concurrent_multiset<Key, Compare, Allocator>& lhs,
concurrent_multiset<Key, Compare, Allocator>& rhs )
{
lhs.swap(rhs);
}
} // namespace d2
} // namespace detail
inline namespace v1 {
using detail::d2::concurrent_set;
using detail::d2::concurrent_multiset;
using detail::split;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_set_H

View file

@ -0,0 +1,415 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_unordered_map_H
#define __TBB_concurrent_unordered_map_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_concurrent_unordered_base.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/libcxx/functional"
namespace tbb {
namespace detail {
namespace d1 {
template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping>
struct concurrent_unordered_map_traits {
using value_type = std::pair<const Key, T>;
using key_type = Key;
using allocator_type = Allocator;
using hash_compare_type = hash_compare<Key, Hash, KeyEqual>;
static constexpr bool allow_multimapping = AllowMultimapping;
static constexpr const key_type& get_key( const value_type& value ) {
return value.first;
}
}; // struct concurrent_unordered_map_traits
template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
class concurrent_unordered_multimap;
template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> >
class concurrent_unordered_map
: public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>>
{
using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>;
using base_type = concurrent_unordered_base<traits_type>;
public:
using key_type = typename base_type::key_type;
using mapped_type = T;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using hasher = typename base_type::hasher;
using key_equal = typename base_type::key_equal;
using allocator_type = typename base_type::allocator_type;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using local_iterator = typename base_type::local_iterator;
using const_local_iterator = typename base_type::const_local_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base type
using base_type::base_type;
// Required for implicit deduction guides
concurrent_unordered_map() = default;
concurrent_unordered_map( const concurrent_unordered_map& ) = default;
concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_unordered_map( concurrent_unordered_map&& ) = default;
concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default;
concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default;
concurrent_unordered_map& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
// Observers
mapped_type& operator[]( const key_type& key ) {
iterator where = this->find(key);
if (where == this->end()) {
where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first;
}
return where->second;
}
mapped_type& operator[]( key_type&& key ) {
iterator where = this->find(key);
if (where == this->end()) {
where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first;
}
return where->second;
}
mapped_type& at( const key_type& key ) {
iterator where = this->find(key);
if (where == this->end()) {
throw_exception(exception_id::invalid_key);
}
return where->second;
}
const mapped_type& at( const key_type& key ) const {
const_iterator where = this->find(key);
if (where == this->end()) {
throw_exception(exception_id::out_of_range);
}
return where->second;
}
using base_type::insert;
template<typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
std::pair<iterator, bool>>::type insert( P&& value ) {
return this->emplace(std::forward<P>(value));
}
template<typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
iterator>::type insert( const_iterator hint, P&& value ) {
return this->emplace_hint(hint, std::forward<P>(value));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
}; // class concurrent_unordered_map
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Hash = std::hash<iterator_key_t<It>>,
typename KeyEq = std::equal_to<iterator_key_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_map( It, It, std::size_t = {},
Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>;
template <typename Key, typename T,
typename Hash = std::hash<std::remove_const_t<Key>>,
typename KeyEq = std::equal_to<std::remove_const_t<Key>>,
typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {},
Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_map( It, It, std::size_t, Alloc )
-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>,
std::hash<iterator_key_t<It>>,
std::equal_to<iterator_key_t<It>>, Alloc>;
// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed
template <typename It, typename Hash, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_map( It, It, std::size_t, Hash, Alloc )
-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>,
Hash, std::equal_to<iterator_key_t<It>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc )
-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc )
-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
template <typename Key, typename T, typename Hash, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc )
-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
#if __APPLE__ && __TBB_CLANG_VERSION == 100000
// An explicit deduction guide is required for copy/move constructor with allocator for APPLE LLVM 10.0.0
// due to an issue with generating an implicit deduction guide for these constructors under several strange surcumstances.
// Currently the issue takes place because the last template parameter for Traits is boolean, it should not affect the deduction guides
// The issue reproduces only on this version of the compiler
template <typename Key, typename T, typename Hash, typename KeyEq, typename Alloc>
concurrent_unordered_map( concurrent_unordered_map<Key, T, Hash, KeyEq, Alloc>, Alloc )
-> concurrent_unordered_map<Key, T, Hash, KeyEq, Alloc>;
#endif
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs,
concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) {
lhs.swap(rhs);
}
template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> >
class concurrent_unordered_multimap
: public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>>
{
using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>;
using base_type = concurrent_unordered_base<traits_type>;
public:
using key_type = typename base_type::key_type;
using mapped_type = T;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using hasher = typename base_type::hasher;
using key_equal = typename base_type::key_equal;
using allocator_type = typename base_type::allocator_type;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using local_iterator = typename base_type::local_iterator;
using const_local_iterator = typename base_type::const_local_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base type
using base_type::base_type;
using base_type::insert;
// Required for implicit deduction guides
concurrent_unordered_multimap() = default;
concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default;
concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default;
concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default;
concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default;
concurrent_unordered_multimap& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template <typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
std::pair<iterator, bool>>::type insert( P&& value ) {
return this->emplace(std::forward<P>(value));
}
template<typename P>
typename std::enable_if<std::is_constructible<value_type, P&&>::value,
iterator>::type insert( const_iterator hint, P&& value ) {
return this->emplace_hint(hint, std::forward<P&&>(value));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
}; // class concurrent_unordered_multimap
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Hash = std::hash<iterator_key_t<It>>,
typename KeyEq = std::equal_to<iterator_key_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>;
template <typename Key, typename T,
typename Hash = std::hash<std::remove_const_t<Key>>,
typename KeyEq = std::equal_to<std::remove_const_t<Key>>,
typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {},
Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multimap( It, It, std::size_t, Alloc )
-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>,
std::hash<iterator_key_t<It>>,
std::equal_to<iterator_key_t<It>>, Alloc>;
template <typename It, typename Hash, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc )
-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash,
std::equal_to<iterator_key_t<It>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc )
-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
template <typename Key, typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc )
-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
template <typename Key, typename T, typename Hash, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc )
-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash,
std::equal_to<std::remove_const_t<Key>>, Alloc>;
#if __APPLE__ && __TBB_CLANG_VERSION == 100000
// An explicit deduction guide is required for copy/move constructor with allocator for APPLE LLVM 10.0.0
// due to an issue with generating an implicit deduction guide for these constructors under several strange surcumstances.
// Currently the issue takes place because the last template parameter for Traits is boolean, it should not affect the deduction guides
// The issue reproduces only on this version of the compiler
template <typename Key, typename T, typename Hash, typename KeyEq, typename Alloc>
concurrent_unordered_multimap( concurrent_unordered_multimap<Key, T, Hash, KeyEq, Alloc>, Alloc )
-> concurrent_unordered_multimap<Key, T, Hash, KeyEq, Alloc>;
#endif
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs,
concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) {
lhs.swap(rhs);
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::concurrent_unordered_map;
using detail::d1::concurrent_unordered_multimap;
using detail::split;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_unordered_map_H

View file

@ -0,0 +1,334 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_concurrent_unordered_set_H
#define __TBB_concurrent_unordered_set_H
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_concurrent_unordered_base.hh"
#include "third_party/tbb/tbb_allocator.hh"
namespace tbb {
namespace detail {
namespace d1 {
template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping>
struct concurrent_unordered_set_traits {
using key_type = Key;
using value_type = key_type;
using allocator_type = Allocator;
using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>;
static constexpr bool allow_multimapping = AllowMultimapping;
static constexpr const key_type& get_key( const value_type& value ) {
return value;
}
}; // class concurrent_unordered_set_traits
template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
class concurrent_unordered_multiset;
template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<Key>>
class concurrent_unordered_set
: public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>>
{
using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>;
using base_type = concurrent_unordered_base<traits_type>;
public:
using key_type = typename base_type::key_type;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using hasher = typename base_type::hasher;
using key_equal = typename base_type::key_equal;
using allocator_type = typename base_type::allocator_type;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using local_iterator = typename base_type::local_iterator;
using const_local_iterator = typename base_type::const_local_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base_type;
using base_type::base_type;
// Required for implicit deduction guides
concurrent_unordered_set() = default;
concurrent_unordered_set( const concurrent_unordered_set& ) = default;
concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_unordered_set( concurrent_unordered_set&& ) = default;
concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default;
concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default;
concurrent_unordered_set& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
}; // class concurrent_unordered_set
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Hash = std::hash<iterator_value_t<It>>,
typename KeyEq = std::equal_to<iterator_value_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>;
template <typename T,
typename Hash = std::hash<T>,
typename KeyEq = std::equal_to<T>,
typename Alloc = tbb::tbb_allocator<T>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_set( std::initializer_list<T>, std::size_t = {},
Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_set<T, Hash, KeyEq, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_set( It, It, std::size_t, Alloc )
-> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>,
std::equal_to<iterator_value_t<It>>, Alloc>;
template <typename It, typename Hash, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_set( It, It, std::size_t, Hash, Alloc )
-> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>;
template <typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc )
-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>;
template <typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_set( std::initializer_list<T>, Alloc )
-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>;
template <typename T, typename Hash, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc )
-> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>;
#if __APPLE__ && __TBB_CLANG_VERSION == 100000
// An explicit deduction guide is required for copy/move constructor with allocator for APPLE LLVM 10.0.0
// due to an issue with generating an implicit deduction guide for these constructors under several strange surcumstances.
// Currently the issue takes place because the last template parameter for Traits is boolean, it should not affect the deduction guides
// The issue reproduces only on this version of the compiler
template <typename T, typename Hash, typename KeyEq, typename Alloc>
concurrent_unordered_set( concurrent_unordered_set<T, Hash, KeyEq, Alloc>, Alloc )
-> concurrent_unordered_set<T, Hash, KeyEq, Alloc>;
#endif
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs,
concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) {
lhs.swap(rhs);
}
template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<Key>>
class concurrent_unordered_multiset
: public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>>
{
using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>;
using base_type = concurrent_unordered_base<traits_type>;
public:
using key_type = typename base_type::key_type;
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;
using difference_type = typename base_type::difference_type;
using hasher = typename base_type::hasher;
using key_equal = typename base_type::key_equal;
using allocator_type = typename base_type::allocator_type;
using reference = typename base_type::reference;
using const_reference = typename base_type::const_reference;
using pointer = typename base_type::pointer;
using const_pointer = typename base_type::const_pointer;
using iterator = typename base_type::iterator;
using const_iterator = typename base_type::const_iterator;
using local_iterator = typename base_type::local_iterator;
using const_local_iterator = typename base_type::const_local_iterator;
using node_type = typename base_type::node_type;
// Include constructors of base_type;
using base_type::base_type;
// Required for implicit deduction guides
concurrent_unordered_multiset() = default;
concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default;
concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {}
concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default;
concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {}
// Required to respect the rule of 5
concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default;
concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default;
concurrent_unordered_multiset& operator=( std::initializer_list<value_type> il ) {
base_type::operator= (il);
return *this;
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) {
this->internal_merge(source);
}
template <typename OtherHash, typename OtherKeyEqual>
void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) {
this->internal_merge(std::move(source));
}
}; // class concurrent_unordered_multiset
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename It,
typename Hash = std::hash<iterator_value_t<It>>,
typename KeyEq = std::equal_to<iterator_value_t<It>>,
typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>;
template <typename T,
typename Hash = std::hash<T>,
typename KeyEq = std::equal_to<T>,
typename Alloc = tbb::tbb_allocator<T>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!is_allocator_v<KeyEq>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {},
Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() )
-> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>;
template <typename It, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multiset( It, It, std::size_t, Alloc )
-> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>,
std::equal_to<iterator_value_t<It>>, Alloc>;
template <typename It, typename Hash, typename Alloc,
typename = std::enable_if_t<is_input_iterator_v<It>>,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc )
-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>;
template <typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc )
-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>;
template <typename T, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>>
concurrent_unordered_multiset( std::initializer_list<T>, Alloc )
-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>;
template <typename T, typename Hash, typename Alloc,
typename = std::enable_if_t<is_allocator_v<Alloc>>,
typename = std::enable_if_t<!is_allocator_v<Hash>>,
typename = std::enable_if_t<!std::is_integral_v<Hash>>>
concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc )
-> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>;
#if __APPLE__ && __TBB_CLANG_VERSION == 100000
// An explicit deduction guide is required for copy/move constructor with allocator for APPLE LLVM 10.0.0
// due to an issue with generating an implicit deduction guide for these constructors under several strange surcumstances.
// Currently the issue takes place because the last template parameter for Traits is boolean, it should not affect the deduction guides
// The issue reproduces only on this version of the compiler
template <typename T, typename Hash, typename KeyEq, typename Alloc>
concurrent_unordered_multiset( concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>, Alloc )
-> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>;
#endif
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs,
concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) {
lhs.swap(rhs);
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::concurrent_unordered_set;
using detail::d1::concurrent_unordered_multiset;
using detail::split;
} // inline namespace v1
} // namespace tbb
#endif // __TBB_concurrent_unordered_set_H

1130
third_party/tbb/concurrent_vector.hh vendored Normal file

File diff suppressed because it is too large Load diff

177
third_party/tbb/detail/_aggregator.hh vendored Normal file
View file

@ -0,0 +1,177 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__aggregator_H
#define __TBB_detail__aggregator_H
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/libcxx/atomic"
#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc
#include "third_party/tbb/profiling.hh"
#endif
namespace tbb {
namespace detail {
namespace d1 {
// Base class for aggregated operation
template <typename Derived>
class aggregated_operation {
public:
// Zero value means "wait" status, all other values are "user" specified values and
// are defined into the scope of a class which uses "status"
std::atomic<uintptr_t> status;
std::atomic<Derived*> next;
aggregated_operation() : status{}, next(nullptr) {}
}; // class aggregated_operation
// Aggregator base class
/* An aggregator for collecting operations coming from multiple sources and executing
them serially on a single thread. OperationType must be derived from
aggregated_operation. The parameter HandlerType is a functor that will be passed the
list of operations and is expected to handle each operation appropriately, setting the
status of each operation to non-zero. */
template <typename OperationType>
class aggregator_generic {
public:
aggregator_generic() : pending_operations(nullptr), handler_busy(false) {}
// Execute an operation
/* Places an operation into the waitlist (pending_operations), and either handles the list,
or waits for the operation to complete, or returns.
The long_life_time parameter specifies the life time of the given operation object.
Operations with long_life_time == true may be accessed after execution.
A "short" life time operation (long_life_time == false) can be destroyed
during execution, and so any access to it after it was put into the waitlist,
including status check, is invalid. As a consequence, waiting for completion
of such operation causes undefined behavior. */
template <typename HandlerType>
void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) {
// op->status should be read before inserting the operation into the
// aggregator waitlist since it can become invalid after executing a
// handler (if the operation has 'short' life time.)
const uintptr_t status = op->status.load(std::memory_order_relaxed);
// ITT note: &(op->status) tag is used to cover accesses to this op node. This
// thread has created the operation, and now releases it so that the handler
// thread may handle the associated operation w/o triggering a race condition;
// thus this tag will be acquired just before the operation is handled in the
// handle_operations functor.
call_itt_notify(releasing, &(op->status));
// insert the operation in the queue.
OperationType* res = pending_operations.load(std::memory_order_relaxed);
do {
op->next.store(res, std::memory_order_relaxed);
} while (!pending_operations.compare_exchange_strong(res, op));
if (!res) { // first in the list; handle the operations
// ITT note: &pending_operations tag covers access to the handler_busy flag,
// which this waiting handler thread will try to set before entering
// handle_operations.
call_itt_notify(acquired, &pending_operations);
start_handle_operations(handle_operations);
// The operation with 'short' life time can already be destroyed
if (long_life_time)
__TBB_ASSERT(op->status.load(std::memory_order_relaxed), nullptr);
}
// Not first; wait for op to be ready
else if (!status) { // operation is blocking here.
__TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing");
call_itt_notify(prepare, &(op->status));
spin_wait_while_eq(op->status, uintptr_t(0));
}
}
private:
// Trigger the handling of operations when the handler is free
template <typename HandlerType>
void start_handle_operations( HandlerType& handle_operations ) {
OperationType* op_list;
// ITT note: &handler_busy tag covers access to pending_operations as it is passed
// between active and waiting handlers. Below, the waiting handler waits until
// the active handler releases, and the waiting handler acquires &handler_busy as
// it becomes the active_handler. The release point is at the end of this
// function, when all operations in pending_operations have been handled by the
// owner of this aggregator.
call_itt_notify(prepare, &handler_busy);
// get the handler_busy:
// only one thread can possibly spin here at a time
spin_wait_until_eq(handler_busy, uintptr_t(0));
call_itt_notify(acquired, &handler_busy);
// acquire fence not necessary here due to causality rule and surrounding atomics
handler_busy.store(1, std::memory_order_relaxed);
// ITT note: &pending_operations tag covers access to the handler_busy flag
// itself. Capturing the state of the pending_operations signifies that
// handler_busy has been set and a new active handler will now process that list's
// operations.
call_itt_notify(releasing, &pending_operations);
// grab pending_operations
op_list = pending_operations.exchange(nullptr);
// handle all the operations
handle_operations(op_list);
// release the handler
handler_busy.store(0, std::memory_order_release);
}
// An atomically updated list (aka mailbox) of pending operations
std::atomic<OperationType*> pending_operations;
// Controls threads access to handle_operations
std::atomic<uintptr_t> handler_busy;
}; // class aggregator_generic
template <typename HandlerType, typename OperationType>
class aggregator : public aggregator_generic<OperationType> {
HandlerType handle_operations;
public:
aggregator() = default;
void initialize_handler( HandlerType h ) { handle_operations = h; }
void execute(OperationType* op) {
aggregator_generic<OperationType>::execute(op, handle_operations);
}
}; // class aggregator
// the most-compatible friend declaration (vs, gcc, icc) is
// template<class U, class V> friend class aggregating_functor;
template <typename AggregatingClass, typename OperationList>
class aggregating_functor {
AggregatingClass* my_object{nullptr};
public:
aggregating_functor() = default;
aggregating_functor( AggregatingClass* object ) : my_object(object) {
__TBB_ASSERT(my_object, nullptr);
}
void operator()( OperationList* op_list ) {
__TBB_ASSERT(my_object, nullptr);
my_object->handle_operations(op_list);
}
}; // class aggregating_functor
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__aggregator_H

View file

@ -0,0 +1,47 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_aligned_space_H
#define __TBB_aligned_space_H
#include "third_party/libcxx/cstddef"
#include "third_party/tbb/detail/_template_helpers.hh"
namespace tbb {
namespace detail {
inline namespace d0 {
//! Block of space aligned sufficiently to construct an array T with N elements.
/** The elements are not constructed or destroyed by this class.
@ingroup memory_allocation */
template<typename T, std::size_t N = 1>
class aligned_space {
alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)];
public:
//! Pointer to beginning of array
T* begin() const { return punned_cast<T*>(&aligned_array); }
//! Pointer to one past last element in array.
T* end() const { return begin() + N; }
};
} // namespace d0
} // namespace detail
} // namespace tbb
#endif /* __TBB_aligned_space_H */

View file

@ -0,0 +1,108 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__allocator_traits_H
#define __TBB_detail__allocator_traits_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/libcxx/memory"
#include "third_party/libcxx/type_traits"
namespace tbb {
namespace detail {
inline namespace d0 {
#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
// Struct is_always_equal_detector provides the member type "type" which is
// Allocator::is_always_equal if it is present, std::false_type otherwise
template <typename Allocator, typename = void>
struct is_always_equal_detector {
using type = std::false_type;
};
template <typename Allocator>
struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>>
{
using type = typename Allocator::is_always_equal;
};
#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
template <typename Allocator>
class allocator_traits : public std::allocator_traits<Allocator>
{
using base_type = std::allocator_traits<Allocator>;
public:
#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT
using is_always_equal = typename is_always_equal_detector<Allocator>::type;
#endif
template <typename T>
using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>;
}; // struct allocator_traits
template <typename Allocator>
void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) {
lhs = rhs;
}
template <typename Allocator>
void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {}
// Copy assigns allocators only if propagate_on_container_copy_assignment is true
template <typename Allocator>
void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) {
using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment;
copy_assign_allocators_impl(lhs, rhs, pocca_type());
}
template <typename Allocator>
void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) {
lhs = std::move(rhs);
}
template <typename Allocator>
void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {}
// Move assigns allocators only if propagate_on_container_move_assignment is true
template <typename Allocator>
void move_assign_allocators( Allocator& lhs, Allocator& rhs ) {
using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment;
move_assign_allocators_impl(lhs, rhs, pocma_type());
}
template <typename Allocator>
void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) {
using std::swap;
swap(lhs, rhs);
}
template <typename Allocator>
void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {}
// Swaps allocators only if propagate_on_container_swap is true
template <typename Allocator>
void swap_allocators( Allocator& lhs, Allocator& rhs ) {
using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap;
swap_allocators_impl(lhs, rhs, pocs_type());
}
} // inline namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__allocator_traits_H

65
third_party/tbb/detail/_assert.hh vendored Normal file
View file

@ -0,0 +1,65 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__assert_H
#define __TBB_detail__assert_H
#include "third_party/tbb/detail/_config.hh"
#if __TBBMALLOC_BUILD
namespace rml { namespace internal {
#else
namespace tbb {
namespace detail {
namespace r1 {
#endif
//! Process an assertion failure.
/** Normally called from __TBB_ASSERT macro.
If assertion handler is null, print message for assertion failure and abort.
Otherwise call the assertion handler. */
TBB_EXPORT void __TBB_EXPORTED_FUNC assertion_failure(const char* location, int line, const char* expression, const char* comment);
#if __TBBMALLOC_BUILD
}} // namespaces rml::internal
#else
} // namespace r1
} // namespace detail
} // namespace tbb
#endif
#if __TBBMALLOC_BUILD
//! Release version of assertions
#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : rml::internal::assertion_failure(__func__,__LINE__,#predicate,message))
#else
#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__func__,__LINE__,#predicate,message))
#endif
#if TBB_USE_ASSERT
//! Assert that predicate is true.
/** If predicate is false, print assertion failure message.
If the comment argument is not nullptr, it is printed as part of the failure message.
The comment argument has no other effect. */
#define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message)
//! "Extended" version
#define __TBB_ASSERT_EX __TBB_ASSERT
#else
//! No-op version of __TBB_ASSERT.
#define __TBB_ASSERT(predicate,comment) ((void)0)
//! "Extended" version is useful to suppress warnings if a variable is only used with an assert
#define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
#endif // TBB_USE_ASSERT
#endif // __TBB_detail__assert_H

33
third_party/tbb/detail/_attach.hh vendored Normal file
View file

@ -0,0 +1,33 @@
// clang-format off
/*
Copyright (c) 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__attach_H
#define __TBB_detail__attach_H
#include "third_party/tbb/detail/_config.hh"
namespace tbb {
namespace detail {
namespace d1 {
struct attach {};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__attach_H

View file

@ -0,0 +1,651 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__concurrent_queue_base_H
#define __TBB_detail__concurrent_queue_base_H
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_machine.hh"
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/tbb/profiling.hh"
#include "third_party/tbb/spin_mutex.hh"
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace d2 {
using ticket_type = std::size_t;
template <typename Page>
inline bool is_valid_page(const Page p) {
return reinterpret_cast<std::uintptr_t>(p) > 1;
}
template <typename T, typename Allocator>
struct concurrent_queue_rep;
template <typename Container, typename T, typename Allocator>
class micro_queue_pop_finalizer;
#if _MSC_VER && !defined(__INTEL_COMPILER)
// unary minus operator applied to unsigned type, result still unsigned
#pragma warning( push )
#pragma warning( disable: 4146 )
#endif
// A queue using simple locking.
// For efficiency, this class has no constructor.
// The caller is expected to zero-initialize it.
template <typename T, typename Allocator>
class micro_queue {
private:
using queue_rep_type = concurrent_queue_rep<T, Allocator>;
using self_type = micro_queue<T, Allocator>;
public:
using size_type = std::size_t;
using value_type = T;
using reference = value_type&;
using const_reference = const value_type&;
using allocator_type = Allocator;
using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_rep_type>;
static constexpr size_type item_size = sizeof(T);
static constexpr size_type items_per_page = item_size <= 8 ? 32 :
item_size <= 16 ? 16 :
item_size <= 32 ? 8 :
item_size <= 64 ? 4 :
item_size <= 128 ? 2 : 1;
struct padded_page {
padded_page() {}
~padded_page() {}
reference operator[] (std::size_t index) {
__TBB_ASSERT(index < items_per_page, "Index out of range");
return items[index];
}
const_reference operator[] (std::size_t index) const {
__TBB_ASSERT(index < items_per_page, "Index out of range");
return items[index];
}
padded_page* next{ nullptr };
std::atomic<std::uintptr_t> mask{};
union {
value_type items[items_per_page];
};
}; // struct padded_page
using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>;
protected:
using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>;
public:
using item_constructor_type = void (*)(value_type* location, const void* src);
micro_queue() = default;
micro_queue( const micro_queue& ) = delete;
micro_queue& operator=( const micro_queue& ) = delete;
size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator,
padded_page*& p ) {
__TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page");
k &= -queue_rep_type::n_queue;
size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page);
if (!index) {
try_call( [&] {
p = page_allocator_traits::allocate(page_allocator, 1);
}).on_exception( [&] {
++base.n_invalid_entries;
invalidate_page( k );
});
page_allocator_traits::construct(page_allocator, p);
}
spin_wait_until_my_turn(tail_counter, k, base);
d1::call_itt_notify(d1::acquired, &tail_counter);
if (p) {
spin_mutex::scoped_lock lock( page_mutex );
padded_page* q = tail_page.load(std::memory_order_relaxed);
if (is_valid_page(q)) {
q->next = p;
} else {
head_page.store(p, std::memory_order_relaxed);
}
tail_page.store(p, std::memory_order_relaxed);
} else {
p = tail_page.load(std::memory_order_relaxed);
}
return index;
}
template<typename... Args>
void push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator, Args&&... args )
{
padded_page* p = nullptr;
page_allocator_type page_allocator(allocator);
size_type index = prepare_page(k, base, page_allocator, p);
__TBB_ASSERT(p != nullptr, "Page was not prepared");
// try_call API is not convenient here due to broken
// variadic capture on GCC 4.8.5
auto value_guard = make_raii_guard([&] {
++base.n_invalid_entries;
d1::call_itt_notify(d1::releasing, &tail_counter);
tail_counter.fetch_add(queue_rep_type::n_queue);
});
page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...);
// If no exception was thrown, mark item as present.
p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed);
d1::call_itt_notify(d1::releasing, &tail_counter);
value_guard.dismiss();
tail_counter.fetch_add(queue_rep_type::n_queue);
}
void abort_push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator ) {
padded_page* p = nullptr;
prepare_page(k, base, allocator, p);
++base.n_invalid_entries;
tail_counter.fetch_add(queue_rep_type::n_queue);
}
bool pop( void* dst, ticket_type k, queue_rep_type& base, queue_allocator_type& allocator ) {
k &= -queue_rep_type::n_queue;
spin_wait_until_eq(head_counter, k);
d1::call_itt_notify(d1::acquired, &head_counter);
spin_wait_while_eq(tail_counter, k);
d1::call_itt_notify(d1::acquired, &tail_counter);
padded_page *p = head_page.load(std::memory_order_relaxed);
__TBB_ASSERT( p, nullptr );
size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page );
bool success = false;
{
page_allocator_type page_allocator(allocator);
micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator,
k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr );
if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) {
success = true;
assign_and_destroy_item(dst, *p, index);
} else {
--base.n_invalid_entries;
}
}
return success;
}
micro_queue& assign( const micro_queue& src, queue_allocator_type& allocator,
item_constructor_type construct_item )
{
head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
const padded_page* srcp = src.head_page.load(std::memory_order_relaxed);
if( is_valid_page(srcp) ) {
ticket_type g_index = head_counter.load(std::memory_order_relaxed);
size_type n_items = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed))
/ queue_rep_type::n_queue;
size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page);
size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page;
try_call( [&] {
head_page.store(make_copy(allocator, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed);
}).on_exception( [&] {
head_counter.store(0, std::memory_order_relaxed);
tail_counter.store(0, std::memory_order_relaxed);
});
padded_page* cur_page = head_page.load(std::memory_order_relaxed);
try_call( [&] {
if (srcp != src.tail_page.load(std::memory_order_relaxed)) {
for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) {
cur_page->next = make_copy( allocator, srcp, 0, items_per_page, g_index, construct_item );
cur_page = cur_page->next;
}
__TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr );
size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page);
if( last_index==0 ) last_index = items_per_page;
cur_page->next = make_copy( allocator, srcp, 0, last_index, g_index, construct_item );
cur_page = cur_page->next;
}
tail_page.store(cur_page, std::memory_order_relaxed);
}).on_exception( [&] {
padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
tail_page.store(invalid_page, std::memory_order_relaxed);
});
} else {
head_page.store(nullptr, std::memory_order_relaxed);
tail_page.store(nullptr, std::memory_order_relaxed);
}
return *this;
}
padded_page* make_copy( queue_allocator_type& allocator, const padded_page* src_page, size_type begin_in_page,
size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item )
{
page_allocator_type page_allocator(allocator);
padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1);
new_page->next = nullptr;
new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed);
for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) {
if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) {
copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item);
}
}
return new_page;
}
void invalidate_page( ticket_type k ) {
// Append an invalid page at address 1 so that no more pushes are allowed.
padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
{
spin_mutex::scoped_lock lock( page_mutex );
tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed);
padded_page* q = tail_page.load(std::memory_order_relaxed);
if (is_valid_page(q)) {
q->next = invalid_page;
} else {
head_page.store(invalid_page, std::memory_order_relaxed);
}
tail_page.store(invalid_page, std::memory_order_relaxed);
}
}
padded_page* get_head_page() {
return head_page.load(std::memory_order_relaxed);
}
void clear(queue_allocator_type& allocator, padded_page* new_head = nullptr, padded_page* new_tail = nullptr) {
padded_page* curr_page = get_head_page();
size_type index = (head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue) % items_per_page;
page_allocator_type page_allocator(allocator);
while (curr_page && is_valid_page(curr_page)) {
while (index != items_per_page) {
if (curr_page->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) {
page_allocator_traits::destroy(page_allocator, &curr_page->operator[](index));
}
++index;
}
index = 0;
padded_page* next_page = curr_page->next;
page_allocator_traits::destroy(page_allocator, curr_page);
page_allocator_traits::deallocate(page_allocator, curr_page, 1);
curr_page = next_page;
}
head_counter.store(0, std::memory_order_relaxed);
tail_counter.store(0, std::memory_order_relaxed);
head_page.store(new_head, std::memory_order_relaxed);
tail_page.store(new_tail, std::memory_order_relaxed);
}
void clear_and_invalidate(queue_allocator_type& allocator) {
padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1));
clear(allocator, invalid_page, invalid_page);
}
private:
// template <typename U, typename A>
friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>;
// Class used to ensure exception-safety of method "pop"
class destroyer {
value_type& my_value;
public:
destroyer( reference value ) : my_value(value) {}
destroyer( const destroyer& ) = delete;
destroyer& operator=( const destroyer& ) = delete;
~destroyer() {my_value.~T();}
}; // class destroyer
void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex,
item_constructor_type construct_item )
{
auto& src_item = src[sindex];
construct_item( &dst[dindex], static_cast<const void*>(&src_item) );
}
void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) {
auto& from = src[index];
destroyer d(from);
*static_cast<T*>(dst) = std::move(from);
}
void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const {
for (atomic_backoff b{};; b.pause()) {
ticket_type c = counter.load(std::memory_order_acquire);
if (c == k) return;
else if (c & 1) {
++rb.n_invalid_entries;
throw_exception( exception_id::bad_last_alloc);
}
}
}
std::atomic<padded_page*> head_page{};
std::atomic<ticket_type> head_counter{};
std::atomic<padded_page*> tail_page{};
std::atomic<ticket_type> tail_counter{};
spin_mutex page_mutex{};
}; // class micro_queue
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning( pop )
#endif // warning 4146 is back
template <typename Container, typename T, typename Allocator>
class micro_queue_pop_finalizer {
public:
using padded_page = typename Container::padded_page;
using allocator_type = Allocator;
using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) :
my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc)
{}
micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete;
micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete;
~micro_queue_pop_finalizer() {
padded_page* p = my_page;
if( is_valid_page(p) ) {
spin_mutex::scoped_lock lock( my_queue.page_mutex );
padded_page* q = p->next;
my_queue.head_page.store(q, std::memory_order_relaxed);
if( !is_valid_page(q) ) {
my_queue.tail_page.store(nullptr, std::memory_order_relaxed);
}
}
my_queue.head_counter.store(my_ticket_type, std::memory_order_release);
if ( is_valid_page(p) ) {
allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p));
allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1);
}
}
private:
ticket_type my_ticket_type;
Container& my_queue;
padded_page* my_page;
Allocator& allocator;
}; // class micro_queue_pop_finalizer
#if _MSC_VER && !defined(__INTEL_COMPILER)
// structure was padded due to alignment specifier
#pragma warning( push )
#pragma warning( disable: 4324 )
#endif
template <typename T, typename Allocator>
struct concurrent_queue_rep {
using self_type = concurrent_queue_rep<T, Allocator>;
using size_type = std::size_t;
using micro_queue_type = micro_queue<T, Allocator>;
using allocator_type = Allocator;
using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
using padded_page = typename micro_queue_type::padded_page;
using page_allocator_type = typename micro_queue_type::page_allocator_type;
using item_constructor_type = typename micro_queue_type::item_constructor_type;
private:
using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>;
using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>;
public:
// must be power of 2
static constexpr size_type n_queue = 8;
// Approximately n_queue/golden ratio
static constexpr size_type phi = 3;
static constexpr size_type item_size = micro_queue_type::item_size;
static constexpr size_type items_per_page = micro_queue_type::items_per_page;
concurrent_queue_rep() {}
concurrent_queue_rep( const concurrent_queue_rep& ) = delete;
concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete;
void clear( queue_allocator_type& alloc ) {
for (size_type index = 0; index < n_queue; ++index) {
array[index].clear(alloc);
}
head_counter.store(0, std::memory_order_relaxed);
tail_counter.store(0, std::memory_order_relaxed);
n_invalid_entries.store(0, std::memory_order_relaxed);
}
void assign( const concurrent_queue_rep& src, queue_allocator_type& alloc, item_constructor_type construct_item ) {
head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed);
n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed);
// copy or move micro_queues
size_type queue_idx = 0;
try_call( [&] {
for (; queue_idx < n_queue; ++queue_idx) {
array[queue_idx].assign(src.array[queue_idx], alloc, construct_item);
}
}).on_exception( [&] {
for (size_type i = 0; i < queue_idx + 1; ++i) {
array[i].clear_and_invalidate(alloc);
}
head_counter.store(0, std::memory_order_relaxed);
tail_counter.store(0, std::memory_order_relaxed);
n_invalid_entries.store(0, std::memory_order_relaxed);
});
__TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) &&
tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed),
"the source concurrent queue should not be concurrently modified." );
}
bool empty() const {
ticket_type tc = tail_counter.load(std::memory_order_acquire);
ticket_type hc = head_counter.load(std::memory_order_relaxed);
// if tc!=r.tail_counter, the queue was not empty at some point between the two reads.
return tc == tail_counter.load(std::memory_order_relaxed) &&
std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0;
}
std::ptrdiff_t size() const {
__TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), nullptr);
std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire);
std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed);
std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed);
return tc - hc - nie;
}
friend class micro_queue<T, Allocator>;
// Map ticket_type to an array index
static size_type index( ticket_type k ) {
return k * phi % n_queue;
}
micro_queue_type& choose( ticket_type k ) {
// The formula here approximates LRU in a cache-oblivious way.
return array[index(k)];
}
alignas(max_nfs_size) micro_queue_type array[n_queue];
alignas(max_nfs_size) std::atomic<ticket_type> head_counter{};
alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{};
alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{};
}; // class concurrent_queue_rep
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning( pop )
#endif
template <typename Value, typename Allocator>
class concurrent_queue_iterator_base {
using queue_rep_type = concurrent_queue_rep<Value, Allocator>;
using padded_page = typename queue_rep_type::padded_page;
protected:
concurrent_queue_iterator_base() = default;
concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) {
assign(other);
}
concurrent_queue_iterator_base( queue_rep_type* queue_rep )
: my_queue_rep(queue_rep),
my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed))
{
for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) {
my_array[i] = my_queue_rep->array[i].get_head_page();
}
if (!get_item(my_item, my_head_counter)) advance();
}
void assign( const concurrent_queue_iterator_base& other ) {
my_item = other.my_item;
my_queue_rep = other.my_queue_rep;
if (my_queue_rep != nullptr) {
my_head_counter = other.my_head_counter;
for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) {
my_array[i] = other.my_array[i];
}
}
}
void advance() {
__TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue");
std::size_t k = my_head_counter;
#if TBB_USE_ASSERT
Value* tmp;
get_item(tmp, k);
__TBB_ASSERT(my_item == tmp, nullptr);
#endif
std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page);
if (i == my_queue_rep->items_per_page - 1) {
padded_page*& root = my_array[queue_rep_type::index(k)];
root = root->next;
}
// Advance k
my_head_counter = ++k;
if (!get_item(my_item, k)) advance();
}
concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) {
this->assign(other);
return *this;
}
bool get_item( Value*& item, std::size_t k ) {
if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) {
item = nullptr;
return true;
} else {
padded_page* p = my_array[queue_rep_type::index(k)];
__TBB_ASSERT(p, nullptr);
std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page);
item = &(*p)[i];
return (p->mask & uintptr_t(1) << i) != 0;
}
}
Value* my_item{ nullptr };
queue_rep_type* my_queue_rep{ nullptr };
ticket_type my_head_counter{};
padded_page* my_array[queue_rep_type::n_queue]{};
}; // class concurrent_queue_iterator_base
struct concurrent_queue_iterator_provider {
template <typename Iterator, typename Container>
static Iterator get( const Container& container ) {
return Iterator(container);
}
}; // struct concurrent_queue_iterator_provider
template <typename Container, typename Value, typename Allocator>
class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> {
using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>;
public:
using value_type = Value;
using pointer = value_type*;
using reference = value_type&;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
concurrent_queue_iterator() = default;
/** If Value==Container::value_type, then this routine is the copy constructor.
If Value==const Container::value_type, then this routine is a conversion constructor. */
concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other )
: base_type(other) {}
private:
concurrent_queue_iterator( const Container& container )
: base_type(container.my_queue_representation) {}
public:
concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) {
this->assign(other);
return *this;
}
reference operator*() const {
return *static_cast<pointer>(this->my_item);
}
pointer operator->() const { return &operator*(); }
concurrent_queue_iterator& operator++() {
this->advance();
return *this;
}
concurrent_queue_iterator operator++(int) {
concurrent_queue_iterator tmp = *this;
++*this;
return tmp;
}
friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) {
return lhs.my_item == rhs.my_item;
}
friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) {
return lhs.my_item != rhs.my_item;
}
private:
friend struct concurrent_queue_iterator_provider;
}; // class concurrent_queue_iterator
} // namespace d2
} // namespace detail
} // tbb
#endif // __TBB_detail__concurrent_queue_base_H

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

530
third_party/tbb/detail/_config.hh vendored Normal file
View file

@ -0,0 +1,530 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__config_H
#define __TBB_detail__config_H
/** This header is supposed to contain macro definitions only.
The macros defined here are intended to control such aspects of TBB build as
- presence of compiler features
- compilation modes
- feature sets
- known compiler/platform issues
**/
/* Check which standard library we use. */
#include "third_party/libcxx/cstddef"
#ifdef __has_include
#if __has_include(<version>)
#include "third_party/libcxx/version"
#endif
#endif
#include "third_party/tbb/detail/_export.hh"
#if _MSC_VER
#define __TBB_EXPORTED_FUNC __cdecl
#define __TBB_EXPORTED_METHOD __thiscall
#else
#define __TBB_EXPORTED_FUNC
#define __TBB_EXPORTED_METHOD
#endif
#if defined(_MSVC_LANG)
#define __TBB_LANG _MSVC_LANG
#else
#define __TBB_LANG __cplusplus
#endif // _MSVC_LANG
#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L)
#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_CPP20_PRESENT (__TBB_LANG >= 202002L)
#if __INTEL_COMPILER || _MSC_VER
#define __TBB_NOINLINE(decl) __declspec(noinline) decl
#elif __GNUC__
#define __TBB_NOINLINE(decl) decl __attribute__ ((noinline))
#else
#define __TBB_NOINLINE(decl) decl
#endif
#define __TBB_STRING_AUX(x) #x
#define __TBB_STRING(x) __TBB_STRING_AUX(x)
// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match
// the actual GCC version on the system.
#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
/* Check which standard library we use. */
// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro.
// Therefore we use different ways to detect its version.
#ifdef TBB_USE_GLIBCXX_VERSION
// The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro.
// Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1.
#define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION
#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__
// Reported versions of GCC and libstdc++ do not match; trust the latter
#define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000)
#elif __GLIBCPP__ || __GLIBCXX__
// The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION
#define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION
#endif
#if __clang__
// according to clang documentation, version can be vendor specific
#define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
/** Macro helpers **/
#define __TBB_CONCAT_AUX(A,B) A##B
// The additional level of indirection is needed to expand macros A and B (not to get the AB macro).
// See [cpp.subst] and [cpp.concat] for more details.
#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B)
// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty).
#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A)
#define __TBB_MACRO_EMPTY 1
#if _M_X64 || _M_ARM64
#define __TBB_W(name) name##64
#else
#define __TBB_W(name) name
#endif
/** User controlled TBB features & modes **/
#ifndef TBB_USE_DEBUG
/*
There are four cases that are supported:
1. "_DEBUG is undefined" means "no debug";
2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug";
3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug";
4. "_DEBUG defined to nothing (empty)" means "debug".
*/
#ifdef _DEBUG
// Check if _DEBUG is empty.
#define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY)
#if __TBB_IS__DEBUG_EMPTY
#define TBB_USE_DEBUG 1
#else
#define TBB_USE_DEBUG _DEBUG
#endif // __TBB_IS__DEBUG_EMPTY
#else
#define TBB_USE_DEBUG 0
#endif // _DEBUG
#endif // TBB_USE_DEBUG
#ifndef TBB_USE_ASSERT
#define TBB_USE_ASSERT TBB_USE_DEBUG
#endif // TBB_USE_ASSERT
#ifndef TBB_USE_PROFILING_TOOLS
#if TBB_USE_DEBUG
#define TBB_USE_PROFILING_TOOLS 2
#else // TBB_USE_DEBUG
#define TBB_USE_PROFILING_TOOLS 0
#endif // TBB_USE_DEBUG
#endif // TBB_USE_PROFILING_TOOLS
// Exceptions support cases
#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC)
#if TBB_USE_EXCEPTIONS
#error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
#elif !defined(TBB_USE_EXCEPTIONS)
#define TBB_USE_EXCEPTIONS 0
#endif
#elif !defined(TBB_USE_EXCEPTIONS)
#define TBB_USE_EXCEPTIONS 1
#endif
/** Preprocessor symbols to determine HW architecture **/
#if _WIN32 || _WIN64
#if defined(_M_X64) || defined(__x86_64__) // the latter for MinGW support
#define __TBB_x86_64 1
#elif defined(_M_IA64)
#define __TBB_ipf 1
#elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support
#define __TBB_x86_32 1
#else
#define __TBB_generic_arch 1
#endif
#else /* Assume generic Unix */
#if __x86_64__
#define __TBB_x86_64 1
#elif __ia64__
#define __TBB_ipf 1
#elif __i386__||__i386 // __i386 is for Sun OS
#define __TBB_x86_32 1
#else
#define __TBB_generic_arch 1
#endif
#endif
/** Windows API or POSIX API **/
#if _WIN32 || _WIN64
#define __TBB_USE_WINAPI 1
#else
#define __TBB_USE_POSIX 1
#endif
/** Internal TBB features & modes **/
/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/
#ifndef __TBB_DYNAMIC_LOAD_ENABLED
#define __TBB_DYNAMIC_LOAD_ENABLED 1
#endif
/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load
shared libraries at run time only from application container **/
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
#define __TBB_WIN8UI_SUPPORT 1
#else
#define __TBB_WIN8UI_SUPPORT 0
#endif
/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
#ifndef __TBB_WEAK_SYMBOLS_PRESENT
#define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
#endif
/** Presence of compiler features **/
#if __clang__ && !__INTEL_COMPILER
#define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti)
#elif defined(_CPPRTTI)
#define __TBB_USE_OPTIONAL_RTTI 1
#else
#define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__)
#endif
/** Address sanitizer detection **/
#ifdef __SANITIZE_ADDRESS__
#define __TBB_USE_ADDRESS_SANITIZER 1
#elif defined(__has_feature)
#if __has_feature(address_sanitizer)
#define __TBB_USE_ADDRESS_SANITIZER 1
#endif
#endif
/** Library features presence macros **/
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L)
#define __TBB_CPP17_INVOKE_PRESENT (__TBB_LANG >= 201703L)
// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
// macro when this feature start working correctly on this compiler.
#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__)
#define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L)
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L)
#define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition
#elif __clang__
#define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates))
#define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition
#ifdef __cpp_deduction_guides
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L)
#else
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0
#endif
#elif __GNUC__
#define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000)
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606L)
#define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201)
#elif _MSC_VER
#define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700))
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021))
#define __TBB_CPP20_CONCEPTS_PRESENT (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER?
#else
#define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L)
#define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 202002L)
#endif
// GCC4.8 on RHEL7 does not support std::get_new_handler
#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION)
// GCC4.8 on RHEL7 does not support std::is_trivially_copyable
#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__))
#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT (_MSC_VER >= 1913 && (__TBB_LANG > 201402L) || \
__TBB_GLIBCXX_VERSION >= 90000 && __TBB_LANG >= 201703L)
#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT (_MSC_VER >= 1911)
#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L)
#if defined(__cpp_impl_three_way_comparison) && defined(__cpp_lib_three_way_comparison)
#define __TBB_CPP20_COMPARISONS_PRESENT ((__cpp_impl_three_way_comparison >= 201907L) && (__cpp_lib_three_way_comparison >= 201907L))
#else
#define __TBB_CPP20_COMPARISONS_PRESENT 0
#endif
#define __TBB_RESUMABLE_TASKS (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__QNXNTO__ && (!__linux__ || __GLIBC__))
/* This macro marks incomplete code or comments describing ideas which are considered for the future.
* See also for plain comment with TODO and FIXME marks for small improvement opportunities.
*/
#define __TBB_TODO 0
/* Check which standard library we use. */
/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */
#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY
#include "third_party/libcxx/cstddef"
#endif
/** Target OS is either iOS* or iOS* simulator **/
#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__
#define __TBB_IOS 1
#endif
#if __APPLE__
#if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \
&& __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000
// ICC does not correctly set the macro if -mmacosx-min-version is not specified
#define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000))
#else
#define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
#endif
#endif
#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
#define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100)
#endif
#if __GNUC__ && !__INTEL_COMPILER && !__clang__
#define __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN (__TBB_GCC_VERSION <= 40805)
#endif
#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L)
#define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER)
#if __TBB_CPP17_FALLTHROUGH_PRESENT
#define __TBB_fallthrough [[fallthrough]]
#elif __TBB_FALLTHROUGH_PRESENT
#define __TBB_fallthrough __attribute__ ((fallthrough))
#else
#define __TBB_fallthrough
#endif
#if __TBB_CPP17_NODISCARD_PRESENT
#define __TBB_nodiscard [[nodiscard]]
#elif __clang__ || __GNUC__
#define __TBB_nodiscard __attribute__((warn_unused_result))
#else
#define __TBB_nodiscard
#endif
#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \
|| _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200))
#define __TBB_TSX_INTRINSICS_PRESENT (__RTM__ || __INTEL_COMPILER || (_MSC_VER>=1700 && (__TBB_x86_64 || __TBB_x86_32)))
#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) \
&& (_WIN32 || _WIN64 || __unix__ || __APPLE__) && (__TBB_x86_32 || __TBB_x86_64) && !__ANDROID__)
/** Internal TBB features & modes **/
/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when
it's necessary to test internal functions not exported from TBB DLLs
**/
#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY)
#define __TBB_NO_IMPLICIT_LINKAGE 1
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#endif
#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
#define __TBB_NO_IMPLICIT_LINKAGE 1
#endif
#if _MSC_VER
#if !__TBB_NO_IMPLICIT_LINKAGE
#ifdef _DEBUG
#pragma comment(lib, "tbb12_debug.lib")
#else
#pragma comment(lib, "tbb12.lib")
#endif
#endif
#endif
#ifndef __TBB_SCHEDULER_OBSERVER
#define __TBB_SCHEDULER_OBSERVER 1
#endif /* __TBB_SCHEDULER_OBSERVER */
#ifndef __TBB_FP_CONTEXT
#define __TBB_FP_CONTEXT 1
#endif /* __TBB_FP_CONTEXT */
#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official
#ifndef __TBB_ARENA_OBSERVER
#define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER
#endif /* __TBB_ARENA_OBSERVER */
#ifndef __TBB_ARENA_BINDING
#define __TBB_ARENA_BINDING 1
#endif
#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY
#define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1
#endif
#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \
(_WIN32 || _WIN64 || __APPLE__ || (defined(__unix__) && !__ANDROID__))
#define __TBB_SURVIVE_THREAD_SWITCH 1
#endif /* __TBB_SURVIVE_THREAD_SWITCH */
#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES
#define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD
#endif
#ifndef __TBB_DEFAULT_PARTITIONER
#define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
#endif
#ifndef __TBB_FLOW_TRACE_CODEPTR
#define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD
#endif
// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template
// instantiation site, which is too late for suppression of the corresponding messages for internal
// stuff.
#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0))
#if (__TBB_LANG >= 201402L && (!defined(_MSC_VER) || _MSC_VER >= 1920))
#define __TBB_DEPRECATED [[deprecated]]
#define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]]
#elif _MSC_VER
#define __TBB_DEPRECATED __declspec(deprecated)
#define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg))
#elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__
#define __TBB_DEPRECATED __attribute__((deprecated))
#define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
#endif
#endif // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
#if !defined(__TBB_DEPRECATED)
#define __TBB_DEPRECATED
#define __TBB_DEPRECATED_MSG(msg)
#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES)
// Suppress deprecated messages from self
#define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1
#endif
#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
#define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED
#define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg)
#else
#define __TBB_DEPRECATED_VERBOSE
#define __TBB_DEPRECATED_VERBOSE_MSG(msg)
#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)
#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900)
#pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.")
#endif
#ifdef _VARIADIC_MAX
#define __TBB_VARIADIC_MAX _VARIADIC_MAX
#else
#if _MSC_VER == 1700
#define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12
#elif _MSC_VER == 1600
#define __TBB_VARIADIC_MAX 10 // VS10 setting
#else
#define __TBB_VARIADIC_MAX 15
#endif
#endif
#if __SANITIZE_THREAD__
#define __TBB_USE_THREAD_SANITIZER 1
#elif defined(__has_feature)
#if __has_feature(thread_sanitizer)
#define __TBB_USE_THREAD_SANITIZER 1
#endif
#endif
#ifndef __TBB_USE_SANITIZERS
#define __TBB_USE_SANITIZERS (__TBB_USE_THREAD_SANITIZER || __TBB_USE_ADDRESS_SANITIZER)
#endif
#ifndef __TBB_RESUMABLE_TASKS_USE_THREADS
#define __TBB_RESUMABLE_TASKS_USE_THREADS __TBB_USE_SANITIZERS
#endif
#ifndef __TBB_USE_CONSTRAINTS
#define __TBB_USE_CONSTRAINTS 1
#endif
#ifndef __TBB_STRICT_CONSTRAINTS
#define __TBB_STRICT_CONSTRAINTS 1
#endif
#if __TBB_CPP20_CONCEPTS_PRESENT && __TBB_USE_CONSTRAINTS
#define __TBB_requires(...) requires __VA_ARGS__
#else // __TBB_CPP20_CONCEPTS_PRESENT
#define __TBB_requires(...)
#endif // __TBB_CPP20_CONCEPTS_PRESENT
/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
the bugs in compilers, standard or OS specific libraries. They should be
removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
versions go out of the support list.
**/
// Some STL containers not support allocator traits in old GCC versions
#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301
#define TBB_ALLOCATOR_TRAITS_BROKEN 1
#endif
// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op.
#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
#define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1
#endif
/** End of __TBB_XXX_BROKEN macro section **/
#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER)
// A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702)
#define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1
#endif
// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang
// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19)
#if __ANDROID__
// MISSING #include <android/api-level.h>
#endif
#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES)
#ifndef __TBB_PREVIEW_CRITICAL_TASKS
#define __TBB_PREVIEW_CRITICAL_TASKS 1
#endif
#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES)
#endif
#if TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS
#define __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS 1
#endif
#if TBB_PREVIEW_TASK_GROUP_EXTENSIONS || __TBB_BUILD
#define __TBB_PREVIEW_TASK_GROUP_EXTENSIONS 1
#endif
#endif // __TBB_detail__config_H

View file

@ -0,0 +1,68 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__containers_helpers_H
#define __TBB_detail__containers_helpers_H
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/libcxx/type_traits"
#include "third_party/libcxx/memory"
#include "third_party/libcxx/functional"
namespace tbb {
namespace detail {
inline namespace d0 {
template <typename Compare, typename = void>
struct comp_is_transparent : std::false_type {};
template <typename Compare>
struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {};
template <typename Key, typename Hasher, typename KeyEqual, typename = void >
struct has_transparent_key_equal : std::false_type { using type = KeyEqual; };
template <typename Key, typename Hasher, typename KeyEqual>
struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type {
using type = typename Hasher::transparent_key_equal;
static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type.");
static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value ||
std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal.");
};
struct is_iterator_impl {
template <typename T>
using iter_traits_category = typename std::iterator_traits<T>::iterator_category;
template <typename T>
using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type;
}; // struct is_iterator_impl
template <typename T>
using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>;
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename T>
inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value;
#endif
} // inline namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__containers_helpers_H

89
third_party/tbb/detail/_exception.hh vendored Normal file
View file

@ -0,0 +1,89 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__exception_H
#define __TBB__exception_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/libcxx/new" // std::bad_alloc
#include "third_party/libcxx/exception" // std::exception
#include "third_party/libcxx/stdexcept" // std::runtime_error
namespace tbb {
namespace detail {
inline namespace d0 {
enum class exception_id {
bad_alloc = 1,
bad_last_alloc,
user_abort,
nonpositive_step,
out_of_range,
reservation_length_error,
missing_wait,
invalid_load_factor,
invalid_key,
bad_tagged_msg_cast,
unsafe_wait,
last_entry
};
} // namespace d0
#if _MSC_VER
#pragma warning(disable: 4275)
#endif
namespace r1 {
//! Exception for concurrent containers
class TBB_EXPORT bad_last_alloc : public std::bad_alloc {
public:
const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
};
//! Exception for user-initiated abort
class TBB_EXPORT user_abort : public std::exception {
public:
const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
};
//! Exception for missing wait on structured_task_group
class TBB_EXPORT missing_wait : public std::exception {
public:
const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override;
};
//! Exception for impossible finalization of task_sheduler_handle
class TBB_EXPORT unsafe_wait : public std::runtime_error {
public:
unsafe_wait(const char* msg) : std::runtime_error(msg) {}
};
//! Gathers all throw operators in one place.
/** Its purpose is to minimize code bloat that can be caused by throw operators
scattered in multiple places, especially in templates. **/
TBB_EXPORT void __TBB_EXPORTED_FUNC throw_exception ( exception_id );
} // namespace r1
inline namespace d0 {
using r1::throw_exception;
} // namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB__exception_H

47
third_party/tbb/detail/_export.hh vendored Normal file
View file

@ -0,0 +1,47 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__export_H
#define __TBB_detail__export_H
#if defined(__MINGW32__)
#define _EXPORT __declspec(dllexport)
#elif defined(_WIN32) || defined(__unix__) || defined(__APPLE__) // Use .def files for these
#define _EXPORT
#else
#error "Unknown platform/compiler"
#endif
#if __TBB_BUILD
#define TBB_EXPORT _EXPORT
#else
#define TBB_EXPORT
#endif
#if __TBBMALLOC_BUILD
#define TBBMALLOC_EXPORT _EXPORT
#else
#define TBBMALLOC_EXPORT
#endif
#if __TBBBIND_BUILD
#define TBBBIND_EXPORT _EXPORT
#else
#define TBBBIND_EXPORT
#endif
#endif

View file

@ -0,0 +1,386 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_body_impl_H
#define __TBB__flow_graph_body_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::detail::d1 (in flow_graph.h)
typedef std::uint64_t tag_value;
// TODO revamp: find out if there is already helper for has_policy.
template<typename ... Policies> struct Policy {};
template<typename ... Policies> struct has_policy;
template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies>
struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> :
std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value ||
has_policy<ExpectedPolicy, Policies...>::value> {};
template<typename ExpectedPolicy, typename SinglePolicy>
struct has_policy<ExpectedPolicy, SinglePolicy> :
std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {};
template<typename ExpectedPolicy, typename ...Policies>
struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {};
namespace graph_policy_namespace {
struct rejecting { };
struct reserving { };
struct queueing { };
struct lightweight { };
// K == type of field used for key-matching. Each tag-matching port will be provided
// functor that, given an object accepted by the port, will return the
/// field of type K being used for matching.
template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > >
__TBB_requires(tbb::detail::hash_compare<KHash, K>)
struct key_matching {
typedef K key_type;
typedef typename std::decay<K>::type base_key_type;
typedef KHash hash_compare_type;
};
// old tag_matching join's new specifier
typedef key_matching<tag_value> tag_matching;
// Aliases for Policy combinations
typedef Policy<queueing, lightweight> queueing_lightweight;
typedef Policy<rejecting, lightweight> rejecting_lightweight;
} // namespace graph_policy_namespace
// -------------- function_body containers ----------------------
//! A functor that takes no input and generates a value of type Output
template< typename Output >
class input_body : no_assign {
public:
virtual ~input_body() {}
virtual Output operator()(flow_control& fc) = 0;
virtual input_body* clone() = 0;
};
//! The leaf for input_body
template< typename Output, typename Body>
class input_body_leaf : public input_body<Output> {
public:
input_body_leaf( const Body &_body ) : body(_body) { }
Output operator()(flow_control& fc) override { return body(fc); }
input_body_leaf* clone() override {
return new input_body_leaf< Output, Body >(body);
}
Body get_body() { return body; }
private:
Body body;
};
//! A functor that takes an Input and generates an Output
template< typename Input, typename Output >
class function_body : no_assign {
public:
virtual ~function_body() {}
virtual Output operator()(const Input &input) = 0;
virtual function_body* clone() = 0;
};
//! the leaf for function_body
template <typename Input, typename Output, typename B>
class function_body_leaf : public function_body< Input, Output > {
public:
function_body_leaf( const B &_body ) : body(_body) { }
Output operator()(const Input &i) override { return tbb::detail::invoke(body,i); }
B get_body() { return body; }
function_body_leaf* clone() override {
return new function_body_leaf< Input, Output, B >(body);
}
private:
B body;
};
//! the leaf for function_body specialized for Input and output of continue_msg
template <typename B>
class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
public:
function_body_leaf( const B &_body ) : body(_body) { }
continue_msg operator()( const continue_msg &i ) override {
body(i);
return i;
}
B get_body() { return body; }
function_body_leaf* clone() override {
return new function_body_leaf< continue_msg, continue_msg, B >(body);
}
private:
B body;
};
//! the leaf for function_body specialized for Output of continue_msg
template <typename Input, typename B>
class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
public:
function_body_leaf( const B &_body ) : body(_body) { }
continue_msg operator()(const Input &i) override {
body(i);
return continue_msg();
}
B get_body() { return body; }
function_body_leaf* clone() override {
return new function_body_leaf< Input, continue_msg, B >(body);
}
private:
B body;
};
//! the leaf for function_body specialized for Input of continue_msg
template <typename Output, typename B>
class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
public:
function_body_leaf( const B &_body ) : body(_body) { }
Output operator()(const continue_msg &i) override {
return body(i);
}
B get_body() { return body; }
function_body_leaf* clone() override {
return new function_body_leaf< continue_msg, Output, B >(body);
}
private:
B body;
};
//! function_body that takes an Input and a set of output ports
template<typename Input, typename OutputSet>
class multifunction_body : no_assign {
public:
virtual ~multifunction_body () {}
virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
virtual multifunction_body* clone() = 0;
virtual void* get_body_ptr() = 0;
};
//! leaf for multifunction. OutputSet can be a std::tuple or a vector.
template<typename Input, typename OutputSet, typename B >
class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
public:
multifunction_body_leaf(const B &_body) : body(_body) { }
void operator()(const Input &input, OutputSet &oset) override {
tbb::detail::invoke(body, input, oset); // body may explicitly put() to one or more of oset.
}
void* get_body_ptr() override { return &body; }
multifunction_body_leaf* clone() override {
return new multifunction_body_leaf<Input, OutputSet,B>(body);
}
private:
B body;
};
// ------ function bodies for hash_buffers and key-matching joins.
template<typename Input, typename Output>
class type_to_key_function_body : no_assign {
public:
virtual ~type_to_key_function_body() {}
virtual Output operator()(const Input &input) = 0; // returns an Output
virtual type_to_key_function_body* clone() = 0;
};
// specialization for ref output
template<typename Input, typename Output>
class type_to_key_function_body<Input,Output&> : no_assign {
public:
virtual ~type_to_key_function_body() {}
virtual const Output & operator()(const Input &input) = 0; // returns a const Output&
virtual type_to_key_function_body* clone() = 0;
};
template <typename Input, typename Output, typename B>
class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> {
public:
type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
Output operator()(const Input &i) override { return tbb::detail::invoke(body, i); }
type_to_key_function_body_leaf* clone() override {
return new type_to_key_function_body_leaf< Input, Output, B>(body);
}
private:
B body;
};
template <typename Input, typename Output, typename B>
class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> {
public:
type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
const Output& operator()(const Input &i) override {
return tbb::detail::invoke(body, i);
}
type_to_key_function_body_leaf* clone() override {
return new type_to_key_function_body_leaf< Input, Output&, B>(body);
}
private:
B body;
};
// --------------------------- end of function_body containers ------------------------
// --------------------------- node task bodies ---------------------------------------
//! A task that calls a node's forward_task function
template< typename NodeType >
class forward_task_bypass : public graph_task {
NodeType &my_node;
public:
forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n
, node_priority_t node_priority = no_priority
) : graph_task(g, allocator, node_priority),
my_node(n) {}
task* execute(execution_data& ed) override {
graph_task* next_task = my_node.forward_task();
if (SUCCESSFULLY_ENQUEUED == next_task)
next_task = nullptr;
else if (next_task)
next_task = prioritize_task(my_node.graph_reference(), *next_task);
finalize<forward_task_bypass>(ed);
return next_task;
}
task* cancel(execution_data& ed) override {
finalize<forward_task_bypass>(ed);
return nullptr;
}
};
//! A task that calls a node's apply_body_bypass function, passing in an input of type Input
// return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return nullptr
template< typename NodeType, typename Input >
class apply_body_task_bypass : public graph_task {
NodeType &my_node;
Input my_input;
public:
apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i
, node_priority_t node_priority = no_priority
) : graph_task(g, allocator, node_priority),
my_node(n), my_input(i) {}
task* execute(execution_data& ed) override {
graph_task* next_task = my_node.apply_body_bypass( my_input );
if (SUCCESSFULLY_ENQUEUED == next_task)
next_task = nullptr;
else if (next_task)
next_task = prioritize_task(my_node.graph_reference(), *next_task);
finalize<apply_body_task_bypass>(ed);
return next_task;
}
task* cancel(execution_data& ed) override {
finalize<apply_body_task_bypass>(ed);
return nullptr;
}
};
//! A task that calls a node's apply_body_bypass function with no input
template< typename NodeType >
class input_node_task_bypass : public graph_task {
NodeType &my_node;
public:
input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n )
: graph_task(g, allocator), my_node(n) {}
task* execute(execution_data& ed) override {
graph_task* next_task = my_node.apply_body_bypass( );
if (SUCCESSFULLY_ENQUEUED == next_task)
next_task = nullptr;
else if (next_task)
next_task = prioritize_task(my_node.graph_reference(), *next_task);
finalize<input_node_task_bypass>(ed);
return next_task;
}
task* cancel(execution_data& ed) override {
finalize<input_node_task_bypass>(ed);
return nullptr;
}
};
// ------------------------ end of node task bodies -----------------------------------
template<typename T, typename DecrementType, typename DummyType = void>
class threshold_regulator;
template<typename T, typename DecrementType>
class threshold_regulator<T, DecrementType,
typename std::enable_if<std::is_integral<DecrementType>::value>::type>
: public receiver<DecrementType>, no_copy
{
T* my_node;
protected:
graph_task* try_put_task( const DecrementType& value ) override {
graph_task* result = my_node->decrement_counter( value );
if( !result )
result = SUCCESSFULLY_ENQUEUED;
return result;
}
graph& graph_reference() const override {
return my_node->my_graph;
}
template<typename U, typename V> friend class limiter_node;
void reset_receiver( reset_flags ) {}
public:
threshold_regulator(T* owner) : my_node(owner) {
// Do not work with the passed pointer here as it may not be fully initialized yet
}
};
template<typename T>
class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy {
T *my_node;
graph_task* execute() override {
return my_node->decrement_counter( 1 );
}
protected:
graph& graph_reference() const override {
return my_node->my_graph;
}
public:
typedef continue_msg input_type;
typedef continue_msg output_type;
threshold_regulator(T* owner)
: continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner)
{
// Do not work with the passed pointer here as it may not be fully initialized yet
}
};
#endif // __TBB__flow_graph_body_impl_H

View file

@ -0,0 +1,435 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_cache_impl_H
#define __TBB__flow_graph_cache_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::detail::d1 (in flow_graph.h)
//! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock.
template< typename T, typename M=spin_mutex >
class node_cache {
public:
typedef size_t size_type;
bool empty() {
typename mutex_type::scoped_lock lock( my_mutex );
return internal_empty();
}
void add( T &n ) {
typename mutex_type::scoped_lock lock( my_mutex );
internal_push(n);
}
void remove( T &n ) {
typename mutex_type::scoped_lock lock( my_mutex );
for ( size_t i = internal_size(); i != 0; --i ) {
T &s = internal_pop();
if ( &s == &n )
break; // only remove one predecessor per request
internal_push(s);
}
}
void clear() {
while( !my_q.empty()) (void)my_q.pop();
}
protected:
typedef M mutex_type;
mutex_type my_mutex;
std::queue< T * > my_q;
// Assumes lock is held
inline bool internal_empty( ) {
return my_q.empty();
}
// Assumes lock is held
inline size_type internal_size( ) {
return my_q.size();
}
// Assumes lock is held
inline void internal_push( T &n ) {
my_q.push(&n);
}
// Assumes lock is held
inline T &internal_pop() {
T *v = my_q.front();
my_q.pop();
return *v;
}
};
//! A cache of predecessors that only supports try_get
template< typename T, typename M=spin_mutex >
class predecessor_cache : public node_cache< sender<T>, M > {
public:
typedef M mutex_type;
typedef T output_type;
typedef sender<output_type> predecessor_type;
typedef receiver<output_type> successor_type;
predecessor_cache( successor_type* owner ) : my_owner( owner ) {
__TBB_ASSERT( my_owner, "predecessor_cache should have an owner." );
// Do not work with the passed pointer here as it may not be fully initialized yet
}
bool get_item( output_type& v ) {
bool msg = false;
do {
predecessor_type *src;
{
typename mutex_type::scoped_lock lock(this->my_mutex);
if ( this->internal_empty() ) {
break;
}
src = &this->internal_pop();
}
// Try to get from this sender
msg = src->try_get( v );
if (msg == false) {
// Relinquish ownership of the edge
register_successor(*src, *my_owner);
} else {
// Retain ownership of the edge
this->add(*src);
}
} while ( msg == false );
return msg;
}
// If we are removing arcs (rf_clear_edges), call clear() rather than reset().
void reset() {
for(;;) {
predecessor_type *src;
{
if (this->internal_empty()) break;
src = &this->internal_pop();
}
register_successor(*src, *my_owner);
}
}
protected:
successor_type* my_owner;
};
//! An cache of predecessors that supports requests and reservations
template< typename T, typename M=spin_mutex >
class reservable_predecessor_cache : public predecessor_cache< T, M > {
public:
typedef M mutex_type;
typedef T output_type;
typedef sender<T> predecessor_type;
typedef receiver<T> successor_type;
reservable_predecessor_cache( successor_type* owner )
: predecessor_cache<T,M>(owner), reserved_src(nullptr)
{
// Do not work with the passed pointer here as it may not be fully initialized yet
}
bool try_reserve( output_type &v ) {
bool msg = false;
do {
predecessor_type* pred = nullptr;
{
typename mutex_type::scoped_lock lock(this->my_mutex);
if ( reserved_src.load(std::memory_order_relaxed) || this->internal_empty() )
return false;
pred = &this->internal_pop();
reserved_src.store(pred, std::memory_order_relaxed);
}
// Try to get from this sender
msg = pred->try_reserve( v );
if (msg == false) {
typename mutex_type::scoped_lock lock(this->my_mutex);
// Relinquish ownership of the edge
register_successor( *pred, *this->my_owner );
reserved_src.store(nullptr, std::memory_order_relaxed);
} else {
// Retain ownership of the edge
this->add( *pred);
}
} while ( msg == false );
return msg;
}
bool try_release() {
reserved_src.load(std::memory_order_relaxed)->try_release();
reserved_src.store(nullptr, std::memory_order_relaxed);
return true;
}
bool try_consume() {
reserved_src.load(std::memory_order_relaxed)->try_consume();
reserved_src.store(nullptr, std::memory_order_relaxed);
return true;
}
void reset() {
reserved_src.store(nullptr, std::memory_order_relaxed);
predecessor_cache<T, M>::reset();
}
void clear() {
reserved_src.store(nullptr, std::memory_order_relaxed);
predecessor_cache<T, M>::clear();
}
private:
std::atomic<predecessor_type*> reserved_src;
};
//! An abstract cache of successors
template<typename T, typename M=spin_rw_mutex >
class successor_cache : no_copy {
protected:
typedef M mutex_type;
mutex_type my_mutex;
typedef receiver<T> successor_type;
typedef receiver<T>* pointer_type;
typedef sender<T> owner_type;
// TODO revamp: introduce heapified collection of successors for strict priorities
typedef std::list< pointer_type > successors_type;
successors_type my_successors;
owner_type* my_owner;
public:
successor_cache( owner_type* owner ) : my_owner(owner) {
// Do not work with the passed pointer here as it may not be fully initialized yet
}
virtual ~successor_cache() {}
void register_successor( successor_type& r ) {
typename mutex_type::scoped_lock l(my_mutex, true);
if( r.priority() != no_priority )
my_successors.push_front( &r );
else
my_successors.push_back( &r );
}
void remove_successor( successor_type& r ) {
typename mutex_type::scoped_lock l(my_mutex, true);
for ( typename successors_type::iterator i = my_successors.begin();
i != my_successors.end(); ++i ) {
if ( *i == & r ) {
my_successors.erase(i);
break;
}
}
}
bool empty() {
typename mutex_type::scoped_lock l(my_mutex, false);
return my_successors.empty();
}
void clear() {
my_successors.clear();
}
virtual graph_task* try_put_task( const T& t ) = 0;
}; // successor_cache<T>
//! An abstract cache of successors, specialized to continue_msg
template<typename M>
class successor_cache< continue_msg, M > : no_copy {
protected:
typedef M mutex_type;
mutex_type my_mutex;
typedef receiver<continue_msg> successor_type;
typedef receiver<continue_msg>* pointer_type;
typedef sender<continue_msg> owner_type;
typedef std::list< pointer_type > successors_type;
successors_type my_successors;
owner_type* my_owner;
public:
successor_cache( sender<continue_msg>* owner ) : my_owner(owner) {
// Do not work with the passed pointer here as it may not be fully initialized yet
}
virtual ~successor_cache() {}
void register_successor( successor_type& r ) {
typename mutex_type::scoped_lock l(my_mutex, true);
if( r.priority() != no_priority )
my_successors.push_front( &r );
else
my_successors.push_back( &r );
__TBB_ASSERT( my_owner, "Cache of successors must have an owner." );
if ( r.is_continue_receiver() ) {
r.register_predecessor( *my_owner );
}
}
void remove_successor( successor_type& r ) {
typename mutex_type::scoped_lock l(my_mutex, true);
for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) {
if ( *i == &r ) {
__TBB_ASSERT(my_owner, "Cache of successors must have an owner.");
// TODO: check if we need to test for continue_receiver before removing from r.
r.remove_predecessor( *my_owner );
my_successors.erase(i);
break;
}
}
}
bool empty() {
typename mutex_type::scoped_lock l(my_mutex, false);
return my_successors.empty();
}
void clear() {
my_successors.clear();
}
virtual graph_task* try_put_task( const continue_msg& t ) = 0;
}; // successor_cache< continue_msg >
//! A cache of successors that are broadcast to
template<typename T, typename M=spin_rw_mutex>
class broadcast_cache : public successor_cache<T, M> {
typedef successor_cache<T, M> base_type;
typedef M mutex_type;
typedef typename successor_cache<T,M>::successors_type successors_type;
public:
broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) {
// Do not work with the passed pointer here as it may not be fully initialized yet
}
// as above, but call try_put_task instead, and return the last task we received (if any)
graph_task* try_put_task( const T &t ) override {
graph_task * last_task = nullptr;
typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
typename successors_type::iterator i = this->my_successors.begin();
while ( i != this->my_successors.end() ) {
graph_task *new_task = (*i)->try_put_task(t);
// workaround for icc bug
graph& graph_ref = (*i)->graph_reference();
last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary
if(new_task) {
++i;
}
else { // failed
if ( (*i)->register_predecessor(*this->my_owner) ) {
i = this->my_successors.erase(i);
} else {
++i;
}
}
}
return last_task;
}
// call try_put_task and return list of received tasks
bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) {
bool is_at_least_one_put_successful = false;
typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
typename successors_type::iterator i = this->my_successors.begin();
while ( i != this->my_successors.end() ) {
graph_task * new_task = (*i)->try_put_task(t);
if(new_task) {
++i;
if(new_task != SUCCESSFULLY_ENQUEUED) {
tasks.push_back(*new_task);
}
is_at_least_one_put_successful = true;
}
else { // failed
if ( (*i)->register_predecessor(*this->my_owner) ) {
i = this->my_successors.erase(i);
} else {
++i;
}
}
}
return is_at_least_one_put_successful;
}
};
//! A cache of successors that are put in a round-robin fashion
template<typename T, typename M=spin_rw_mutex >
class round_robin_cache : public successor_cache<T, M> {
typedef successor_cache<T, M> base_type;
typedef size_t size_type;
typedef M mutex_type;
typedef typename successor_cache<T,M>::successors_type successors_type;
public:
round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) {
// Do not work with the passed pointer here as it may not be fully initialized yet
}
size_type size() {
typename mutex_type::scoped_lock l(this->my_mutex, false);
return this->my_successors.size();
}
graph_task* try_put_task( const T &t ) override {
typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true);
typename successors_type::iterator i = this->my_successors.begin();
while ( i != this->my_successors.end() ) {
graph_task* new_task = (*i)->try_put_task(t);
if ( new_task ) {
return new_task;
} else {
if ( (*i)->register_predecessor(*this->my_owner) ) {
i = this->my_successors.erase(i);
}
else {
++i;
}
}
}
return nullptr;
}
};
#endif // __TBB__flow_graph_cache_impl_H

View file

@ -0,0 +1,477 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_flow_graph_impl_H
#define __TBB_flow_graph_impl_H
// // MISSING #include "../config.h"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/task_group.hh"
#include "third_party/tbb/task_arena.hh"
#include "third_party/tbb/flow_graph_abstractions.hh"
#include "third_party/tbb/concurrent_priority_queue.hh"
#include "third_party/libcxx/list"
namespace tbb {
namespace detail {
namespace d1 {
class graph_task;
static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1;
typedef unsigned int node_priority_t;
static const node_priority_t no_priority = node_priority_t(0);
class graph;
class graph_node;
template <typename GraphContainerType, typename GraphNodeType>
class graph_iterator {
friend class graph;
friend class graph_node;
public:
typedef size_t size_type;
typedef GraphNodeType value_type;
typedef GraphNodeType* pointer;
typedef GraphNodeType& reference;
typedef const GraphNodeType& const_reference;
typedef std::forward_iterator_tag iterator_category;
//! Copy constructor
graph_iterator(const graph_iterator& other) :
my_graph(other.my_graph), current_node(other.current_node)
{}
//! Assignment
graph_iterator& operator=(const graph_iterator& other) {
if (this != &other) {
my_graph = other.my_graph;
current_node = other.current_node;
}
return *this;
}
//! Dereference
reference operator*() const;
//! Dereference
pointer operator->() const;
//! Equality
bool operator==(const graph_iterator& other) const {
return ((my_graph == other.my_graph) && (current_node == other.current_node));
}
#if !__TBB_CPP20_COMPARISONS_PRESENT
//! Inequality
bool operator!=(const graph_iterator& other) const { return !(operator==(other)); }
#endif
//! Pre-increment
graph_iterator& operator++() {
internal_forward();
return *this;
}
//! Post-increment
graph_iterator operator++(int) {
graph_iterator result = *this;
operator++();
return result;
}
private:
// the graph over which we are iterating
GraphContainerType *my_graph;
// pointer into my_graph's my_nodes list
pointer current_node;
//! Private initializing constructor for begin() and end() iterators
graph_iterator(GraphContainerType *g, bool begin);
void internal_forward();
}; // class graph_iterator
// flags to modify the behavior of the graph reset(). Can be combined.
enum reset_flags {
rf_reset_protocol = 0,
rf_reset_bodies = 1 << 0, // delete the current node body, reset to a copy of the initial node body.
rf_clear_edges = 1 << 1 // delete edges
};
void activate_graph(graph& g);
void deactivate_graph(graph& g);
bool is_graph_active(graph& g);
graph_task* prioritize_task(graph& g, graph_task& arena_task);
void spawn_in_graph_arena(graph& g, graph_task& arena_task);
void enqueue_in_graph_arena(graph &g, graph_task& arena_task);
class graph;
//! Base class for tasks generated by graph nodes.
class graph_task : public task {
public:
graph_task(graph& g, small_object_allocator& allocator
, node_priority_t node_priority = no_priority
)
: my_graph(g)
, priority(node_priority)
, my_allocator(allocator)
{}
graph& my_graph; // graph instance the task belongs to
// TODO revamp: rename to my_priority
node_priority_t priority;
template <typename DerivedType>
void destruct_and_deallocate(const execution_data& ed);
protected:
template <typename DerivedType>
void finalize(const execution_data& ed);
private:
// To organize task_list
graph_task* my_next{ nullptr };
small_object_allocator my_allocator;
// TODO revamp: elaborate internal interfaces to avoid friends declarations
friend class graph_task_list;
friend graph_task* prioritize_task(graph& g, graph_task& gt);
};
struct graph_task_comparator {
bool operator()(const graph_task* left, const graph_task* right) {
return left->priority < right->priority;
}
};
typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t;
class priority_task_selector : public task {
public:
priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator)
: my_priority_queue(priority_queue), my_allocator(allocator), my_task() {}
task* execute(execution_data& ed) override {
next_task();
__TBB_ASSERT(my_task, nullptr);
task* t_next = my_task->execute(ed);
my_allocator.delete_object(this, ed);
return t_next;
}
task* cancel(execution_data& ed) override {
if (!my_task) {
next_task();
}
__TBB_ASSERT(my_task, nullptr);
task* t_next = my_task->cancel(ed);
my_allocator.delete_object(this, ed);
return t_next;
}
private:
void next_task() {
// TODO revamp: hold functors in priority queue instead of real tasks
bool result = my_priority_queue.try_pop(my_task);
__TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks"
" in graph's priority queue mismatched");
__TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED,
"Incorrect task submitted to graph priority queue");
__TBB_ASSERT(my_task->priority != no_priority,
"Tasks from graph's priority queue must have priority");
}
graph_task_priority_queue_t& my_priority_queue;
small_object_allocator my_allocator;
graph_task* my_task;
};
template <typename Receiver, typename Body> class run_and_put_task;
template <typename Body> class run_task;
//********************************************************************************
// graph tasks helpers
//********************************************************************************
//! The list of graph tasks
class graph_task_list : no_copy {
private:
graph_task* my_first;
graph_task** my_next_ptr;
public:
//! Construct empty list
graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {}
//! True if list is empty; false otherwise.
bool empty() const { return !my_first; }
//! Push task onto back of list.
void push_back(graph_task& task) {
task.my_next = nullptr;
*my_next_ptr = &task;
my_next_ptr = &task.my_next;
}
//! Pop the front task from the list.
graph_task& pop_front() {
__TBB_ASSERT(!empty(), "attempt to pop item from empty task_list");
graph_task* result = my_first;
my_first = result->my_next;
if (!my_first) {
my_next_ptr = &my_first;
}
return *result;
}
};
//! The graph class
/** This class serves as a handle to the graph */
class graph : no_copy, public graph_proxy {
friend class graph_node;
void prepare_task_arena(bool reinit = false) {
if (reinit) {
__TBB_ASSERT(my_task_arena, "task arena is nullptr");
my_task_arena->terminate();
my_task_arena->initialize(task_arena::attach());
}
else {
__TBB_ASSERT(my_task_arena == nullptr, "task arena is not nullptr");
my_task_arena = new task_arena(task_arena::attach());
}
if (!my_task_arena->is_active()) // failed to attach
my_task_arena->initialize(); // create a new, default-initialized arena
__TBB_ASSERT(my_task_arena->is_active(), "task arena is not active");
}
public:
//! Constructs a graph with isolated task_group_context
graph();
//! Constructs a graph with use_this_context as context
explicit graph(task_group_context& use_this_context);
//! Destroys the graph.
/** Calls wait_for_all, then destroys the root task and context. */
~graph();
//! Used to register that an external entity may still interact with the graph.
/** The graph will not return from wait_for_all until a matching number of release_wait calls is
made. */
void reserve_wait() override;
//! Deregisters an external entity that may have interacted with the graph.
/** The graph will not return from wait_for_all until all the number of reserve_wait calls
matches the number of release_wait calls. */
void release_wait() override;
//! Wait until graph is idle and the number of release_wait calls equals to the number of
//! reserve_wait calls.
/** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */
void wait_for_all() {
cancelled = false;
caught_exception = false;
try_call([this] {
my_task_arena->execute([this] {
wait(my_wait_context, *my_context);
});
cancelled = my_context->is_group_execution_cancelled();
}).on_exception([this] {
my_context->reset();
caught_exception = true;
cancelled = true;
});
// TODO: the "if" condition below is just a work-around to support the concurrent wait
// mode. The cancellation and exception mechanisms are still broken in this mode.
// Consider using task group not to re-implement the same functionality.
if (!(my_context->traits() & task_group_context::concurrent_wait)) {
my_context->reset(); // consistent with behavior in catch()
}
}
// TODO revamp: consider adding getter for task_group_context.
// ITERATORS
template<typename C, typename N>
friend class graph_iterator;
// Graph iterator typedefs
typedef graph_iterator<graph, graph_node> iterator;
typedef graph_iterator<const graph, const graph_node> const_iterator;
// Graph iterator constructors
//! start iterator
iterator begin();
//! end iterator
iterator end();
//! start const iterator
const_iterator begin() const;
//! end const iterator
const_iterator end() const;
//! start const iterator
const_iterator cbegin() const;
//! end const iterator
const_iterator cend() const;
// thread-unsafe state reset.
void reset(reset_flags f = rf_reset_protocol);
//! cancels execution of the associated task_group_context
void cancel();
//! return status of graph execution
bool is_cancelled() { return cancelled; }
bool exception_thrown() { return caught_exception; }
private:
wait_context my_wait_context;
task_group_context *my_context;
bool own_context;
bool cancelled;
bool caught_exception;
bool my_is_active;
graph_node *my_nodes, *my_nodes_last;
tbb::spin_mutex nodelist_mutex;
void register_node(graph_node *n);
void remove_node(graph_node *n);
task_arena* my_task_arena;
graph_task_priority_queue_t my_priority_queue;
friend void activate_graph(graph& g);
friend void deactivate_graph(graph& g);
friend bool is_graph_active(graph& g);
friend graph_task* prioritize_task(graph& g, graph_task& arena_task);
friend void spawn_in_graph_arena(graph& g, graph_task& arena_task);
friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task);
friend class task_arena_base;
}; // class graph
template<typename DerivedType>
inline void graph_task::destruct_and_deallocate(const execution_data& ed) {
auto allocator = my_allocator;
// TODO: investigate if direct call of derived destructor gives any benefits.
this->~graph_task();
allocator.deallocate(static_cast<DerivedType*>(this), ed);
}
template<typename DerivedType>
inline void graph_task::finalize(const execution_data& ed) {
graph& g = my_graph;
destruct_and_deallocate<DerivedType>(ed);
g.release_wait();
}
//********************************************************************************
// end of graph tasks helpers
//********************************************************************************
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
class get_graph_helper;
#endif
//! The base of all graph nodes.
class graph_node : no_copy {
friend class graph;
template<typename C, typename N>
friend class graph_iterator;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
friend class get_graph_helper;
#endif
protected:
graph& my_graph;
graph& graph_reference() const {
// TODO revamp: propagate graph_reference() method to all the reference places.
return my_graph;
}
graph_node* next = nullptr;
graph_node* prev = nullptr;
public:
explicit graph_node(graph& g);
virtual ~graph_node();
protected:
// performs the reset on an individual node.
virtual void reset_node(reset_flags f = rf_reset_protocol) = 0;
}; // class graph_node
inline void activate_graph(graph& g) {
g.my_is_active = true;
}
inline void deactivate_graph(graph& g) {
g.my_is_active = false;
}
inline bool is_graph_active(graph& g) {
return g.my_is_active;
}
inline graph_task* prioritize_task(graph& g, graph_task& gt) {
if( no_priority == gt.priority )
return &gt;
//! Non-preemptive priority pattern. The original task is submitted as a work item to the
//! priority queue, and a new critical task is created to take and execute a work item with
//! the highest known priority. The reference counting responsibility is transferred (via
//! allocate_continuation) to the new task.
task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator);
__TBB_ASSERT( critical_task, "bad_alloc?" );
g.my_priority_queue.push(&gt);
using tbb::detail::d1::submit;
submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true );
return nullptr;
}
//! Spawns a task inside graph arena
inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) {
if (is_graph_active(g)) {
task* gt = prioritize_task(g, arena_task);
if( !gt )
return;
__TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), nullptr);
submit( *gt, *g.my_task_arena, *g.my_context
#if __TBB_PREVIEW_CRITICAL_TASKS
, /*as_critical=*/false
#endif
);
}
}
// TODO revamp: unify *_in_graph_arena functions
//! Enqueues a task inside graph arena
inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) {
if (is_graph_active(g)) {
__TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" );
// TODO revamp: decide on the approach that does not postpone critical task
if( task* gt = prioritize_task(g, arena_task) )
submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false);
}
}
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_flow_graph_impl_H

View file

@ -0,0 +1,352 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_indexer_impl_H
#define __TBB__flow_graph_indexer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::detail::d1
#include "third_party/tbb/detail/_flow_graph_types_impl.hh"
// Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
// the form tagged_msg<tag, result>
// where the value of tag will indicate which result was put to the
// successor.
template<typename IndexerNodeBaseType, typename T, size_t K>
graph_task* do_try_put(const T &v, void *p) {
typename IndexerNodeBaseType::output_type o(K, v);
return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
}
template<typename TupleTypes,int N>
struct indexer_helper {
template<typename IndexerNodeBaseType, typename PortTuple>
static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
typedef typename std::tuple_element<N-1, TupleTypes>::type T;
graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g);
indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g);
}
};
template<typename TupleTypes>
struct indexer_helper<TupleTypes,1> {
template<typename IndexerNodeBaseType, typename PortTuple>
static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
typedef typename std::tuple_element<0, TupleTypes>::type T;
graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
std::get<0>(my_input).set_up(p, indexer_node_put_task, g);
}
};
template<typename T>
class indexer_input_port : public receiver<T> {
private:
void* my_indexer_ptr;
typedef graph_task* (* forward_function_ptr)(T const &, void* );
forward_function_ptr my_try_put_task;
graph* my_graph;
public:
void set_up(void* p, forward_function_ptr f, graph& g) {
my_indexer_ptr = p;
my_try_put_task = f;
my_graph = &g;
}
protected:
template< typename R, typename B > friend class run_and_put_task;
template<typename X, typename Y> friend class broadcast_cache;
template<typename X, typename Y> friend class round_robin_cache;
graph_task* try_put_task(const T &v) override {
return my_try_put_task(v, my_indexer_ptr);
}
graph& graph_reference() const override {
return *my_graph;
}
};
template<typename InputTuple, typename OutputType, typename StructTypes>
class indexer_node_FE {
public:
static const int N = std::tuple_size<InputTuple>::value;
typedef OutputType output_type;
typedef InputTuple input_type;
// Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member.
indexer_node_FE() : my_inputs() {}
input_type &input_ports() { return my_inputs; }
protected:
input_type my_inputs;
};
//! indexer_node_base
template<typename InputTuple, typename OutputType, typename StructTypes>
class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
public sender<OutputType> {
protected:
using graph_node::my_graph;
public:
static const size_t N = std::tuple_size<InputTuple>::value;
typedef OutputType output_type;
typedef StructTypes tuple_types;
typedef typename sender<output_type>::successor_type successor_type;
typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
private:
// ----------- Aggregator ------------
enum op_type { reg_succ, rem_succ, try__put_task
};
typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type;
class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
public:
char type;
union {
output_type const *my_arg;
successor_type *my_succ;
graph_task* bypass_t;
};
indexer_node_base_operation(const output_type* e, op_type t) :
type(char(t)), my_arg(e) {}
indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
my_succ(const_cast<successor_type *>(&s)) {}
};
typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type;
friend class aggregating_functor<class_type, indexer_node_base_operation>;
aggregator<handler_type, indexer_node_base_operation> my_aggregator;
void handle_operations(indexer_node_base_operation* op_list) {
indexer_node_base_operation *current;
while(op_list) {
current = op_list;
op_list = op_list->next;
switch(current->type) {
case reg_succ:
my_successors.register_successor(*(current->my_succ));
current->status.store( SUCCEEDED, std::memory_order_release);
break;
case rem_succ:
my_successors.remove_successor(*(current->my_succ));
current->status.store( SUCCEEDED, std::memory_order_release);
break;
case try__put_task: {
current->bypass_t = my_successors.try_put_task(*(current->my_arg));
current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value
}
break;
}
}
}
// ---------- end aggregator -----------
public:
indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) {
indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g);
my_aggregator.initialize_handler(handler_type(this));
}
indexer_node_base(const indexer_node_base& other)
: graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this)
{
indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph);
my_aggregator.initialize_handler(handler_type(this));
}
bool register_successor(successor_type &r) override {
indexer_node_base_operation op_data(r, reg_succ);
my_aggregator.execute(&op_data);
return op_data.status == SUCCEEDED;
}
bool remove_successor( successor_type &r) override {
indexer_node_base_operation op_data(r, rem_succ);
my_aggregator.execute(&op_data);
return op_data.status == SUCCEEDED;
}
graph_task* try_put_task(output_type const *v) { // not a virtual method in this class
indexer_node_base_operation op_data(v, try__put_task);
my_aggregator.execute(&op_data);
return op_data.bypass_t;
}
protected:
void reset_node(reset_flags f) override {
if(f & rf_clear_edges) {
my_successors.clear();
}
}
private:
broadcast_cache<output_type, null_rw_mutex> my_successors;
}; //indexer_node_base
template<int N, typename InputTuple> struct input_types;
template<typename InputTuple>
struct input_types<1, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef tagged_msg<size_t, first_type > type;
};
template<typename InputTuple>
struct input_types<2, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef tagged_msg<size_t, first_type, second_type> type;
};
template<typename InputTuple>
struct input_types<3, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef tagged_msg<size_t, first_type, second_type, third_type> type;
};
template<typename InputTuple>
struct input_types<4, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type> type;
};
template<typename InputTuple>
struct input_types<5, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type> type;
};
template<typename InputTuple>
struct input_types<6, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type> type;
};
template<typename InputTuple>
struct input_types<7, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type> type;
};
template<typename InputTuple>
struct input_types<8, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type> type;
};
template<typename InputTuple>
struct input_types<9, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
typedef typename std::tuple_element<8, InputTuple>::type nineth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type, nineth_type> type;
};
template<typename InputTuple>
struct input_types<10, InputTuple> {
typedef typename std::tuple_element<0, InputTuple>::type first_type;
typedef typename std::tuple_element<1, InputTuple>::type second_type;
typedef typename std::tuple_element<2, InputTuple>::type third_type;
typedef typename std::tuple_element<3, InputTuple>::type fourth_type;
typedef typename std::tuple_element<4, InputTuple>::type fifth_type;
typedef typename std::tuple_element<5, InputTuple>::type sixth_type;
typedef typename std::tuple_element<6, InputTuple>::type seventh_type;
typedef typename std::tuple_element<7, InputTuple>::type eighth_type;
typedef typename std::tuple_element<8, InputTuple>::type nineth_type;
typedef typename std::tuple_element<9, InputTuple>::type tenth_type;
typedef tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type, nineth_type,
tenth_type> type;
};
// type generators
template<typename OutputTuple>
struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> {
static const int N = std::tuple_size<OutputTuple>::value;
typedef typename input_types<N, OutputTuple>::type output_type;
typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
};
template<class OutputTuple>
class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
public:
typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
typedef OutputTuple tuple_types;
typedef typename indexer_types<OutputTuple>::output_type output_type;
private:
typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
public:
unfolded_indexer_node(graph& g) : base_type(g) {}
unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
};
#endif /* __TBB__flow_graph_indexer_impl_H */

View file

@ -0,0 +1,280 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_item_buffer_impl_H
#define __TBB__flow_graph_item_buffer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include "third_party/tbb/detail/_aligned_space.hh"
// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h)
//! Expandable buffer of items. The possible operations are push, pop,
//* tests for empty and so forth. No mutual exclusion is built in.
//* objects are constructed into and explicitly-destroyed. get_my_item gives
// a read-only reference to the item in the buffer. set_my_item may be called
// with either an empty or occupied slot.
template <typename T, typename A=cache_aligned_allocator<T> >
class item_buffer {
public:
typedef T item_type;
enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
protected:
typedef size_t size_type;
typedef std::pair<item_type, buffer_item_state> aligned_space_item;
typedef aligned_space<aligned_space_item> buffer_item_type;
typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type;
buffer_item_type *my_array;
size_type my_array_size;
static const size_type initial_buffer_size = 4;
size_type my_head;
size_type my_tail;
bool buffer_empty() const { return my_head == my_tail; }
aligned_space_item &item(size_type i) {
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), nullptr);
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), nullptr);
return *my_array[i & (my_array_size - 1) ].begin();
}
const aligned_space_item &item(size_type i) const {
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), nullptr);
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), nullptr);
return *my_array[i & (my_array_size-1)].begin();
}
bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
#if TBB_USE_ASSERT
bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; }
#endif
// object management in buffer
const item_type &get_my_item(size_t i) const {
__TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first));
return *itm;
}
// may be called with an empty slot or a slot that has already been constructed into.
void set_my_item(size_t i, const item_type &o) {
if(item(i).second != no_item) {
destroy_item(i);
}
new(&(item(i).first)) item_type(o);
item(i).second = has_item;
}
// destructively-fetch an object from the buffer
void fetch_item(size_t i, item_type &o) {
__TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
o = get_my_item(i); // could have std::move assign semantics
destroy_item(i);
}
// move an existing item from one slot to another. The moved-to slot must be unoccupied,
// the moved-from slot must exist and not be reserved. The after, from will be empty,
// to will be occupied but not reserved
void move_item(size_t to, size_t from) {
__TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
__TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
set_my_item(to, get_my_item(from)); // could have std::move semantics
destroy_item(from);
}
// put an item in an empty slot. Return true if successful, else false
bool place_item(size_t here, const item_type &me) {
#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
if(my_item_valid(here)) return false;
#endif
set_my_item(here, me);
return true;
}
// could be implemented with std::move semantics
void swap_items(size_t i, size_t j) {
__TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
item_type temp = get_my_item(i);
set_my_item(i, get_my_item(j));
set_my_item(j, temp);
}
void destroy_item(size_type i) {
__TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
item(i).first.~item_type();
item(i).second = no_item;
}
// returns the front element
const item_type& front() const
{
__TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
return get_my_item(my_head);
}
// returns the back element
const item_type& back() const
{
__TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item");
return get_my_item(my_tail - 1);
}
// following methods are for reservation of the front of a buffer.
void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
void destroy_front() { destroy_item(my_head); ++my_head; }
void destroy_back() { destroy_item(my_tail-1); --my_tail; }
// we have to be able to test against a new tail value without changing my_tail
// grow_array doesn't work if we change my_tail when the old array is too small
size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
size_type capacity() { return my_array_size; }
// sequencer_node does not use this method, so we don't
// need a version that passes in the new_tail value.
bool buffer_full() { return size() >= capacity(); }
//! Grows the internal array.
void grow_my_array( size_t minimum_size ) {
// test that we haven't made the structure inconsistent.
__TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
while( new_size<minimum_size )
new_size*=2;
buffer_item_type* new_array = allocator_type().allocate(new_size);
// initialize validity to "no"
for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; }
for( size_type i=my_head; i<my_tail; ++i) {
if(my_item_valid(i)) { // sequencer_node may have empty slots
// placement-new copy-construct; could be std::move
char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first);
(void)new(new_space) item_type(get_my_item(i));
new_array[i&(new_size-1)].begin()->second = item(i).second;
}
}
clean_up_buffer(/*reset_pointers*/false);
my_array = new_array;
my_array_size = new_size;
}
bool push_back(item_type &v) {
if(buffer_full()) {
grow_my_array(size() + 1);
}
set_my_item(my_tail, v);
++my_tail;
return true;
}
bool pop_back(item_type &v) {
if (!my_item_valid(my_tail-1)) {
return false;
}
v = this->back();
destroy_back();
return true;
}
bool pop_front(item_type &v) {
if(!my_item_valid(my_head)) {
return false;
}
v = this->front();
destroy_front();
return true;
}
// This is used both for reset and for grow_my_array. In the case of grow_my_array
// we want to retain the values of the head and tail.
void clean_up_buffer(bool reset_pointers) {
if (my_array) {
for( size_type i=my_head; i<my_tail; ++i ) {
if(my_item_valid(i))
destroy_item(i);
}
allocator_type().deallocate(my_array,my_array_size);
}
my_array = nullptr;
if(reset_pointers) {
my_head = my_tail = my_array_size = 0;
}
}
public:
//! Constructor
item_buffer( ) : my_array(nullptr), my_array_size(0),
my_head(0), my_tail(0) {
grow_my_array(initial_buffer_size);
}
~item_buffer() {
clean_up_buffer(/*reset_pointers*/true);
}
void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
};
//! item_buffer with reservable front-end. NOTE: if reserving, do not
//* complete operation with pop_front(); use consume_front().
//* No synchronization built-in.
template<typename T, typename A=cache_aligned_allocator<T> >
class reservable_item_buffer : public item_buffer<T, A> {
protected:
using item_buffer<T, A>::my_item_valid;
using item_buffer<T, A>::my_head;
public:
reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
protected:
bool reserve_front(T &v) {
if(my_reserved || !my_item_valid(this->my_head)) return false;
my_reserved = true;
// reserving the head
v = this->front();
this->reserve_item(this->my_head);
return true;
}
void consume_front() {
__TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
this->destroy_front();
my_reserved = false;
}
void release_front() {
__TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
this->release_item(this->my_head);
my_reserved = false;
}
bool my_reserved;
};
#endif // __TBB__flow_graph_item_buffer_impl_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,775 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_node_impl_H
#define __TBB__flow_graph_node_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include "third_party/tbb/detail/_flow_graph_item_buffer_impl.hh"
template< typename T, typename A >
class function_input_queue : public item_buffer<T,A> {
public:
bool empty() const {
return this->buffer_empty();
}
const T& front() const {
return this->item_buffer<T, A>::front();
}
void pop() {
this->destroy_front();
}
bool push( T& t ) {
return this->push_back( t );
}
};
//! Input and scheduling for a function node that takes a type Input as input
// The only up-ref is apply_body_impl, which should implement the function
// call and any handling of the result.
template< typename Input, typename Policy, typename A, typename ImplType >
class function_input_base : public receiver<Input>, no_assign {
enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency
};
typedef function_input_base<Input, Policy, A, ImplType> class_type;
public:
//! The input type of this receiver
typedef Input input_type;
typedef typename receiver<input_type>::predecessor_type predecessor_type;
typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
typedef function_input_queue<input_type, A> input_queue_type;
typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type;
static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, "");
//! Constructor for function_input_base
function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority, bool is_no_throw )
: my_graph_ref(g), my_max_concurrency(max_concurrency)
, my_concurrency(0), my_priority(a_priority), my_is_no_throw(is_no_throw)
, my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : nullptr)
, my_predecessors(this)
, forwarder_busy(false)
{
my_aggregator.initialize_handler(handler_type(this));
}
//! Copy constructor
function_input_base( const function_input_base& src )
: function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority, src.my_is_no_throw) {}
//! Destructor
// The queue is allocated by the constructor for {multi}function_node.
// TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
// This would be an interface-breaking change.
virtual ~function_input_base() {
delete my_queue;
my_queue = nullptr;
}
graph_task* try_put_task( const input_type& t) override {
if ( my_is_no_throw )
return try_put_task_impl(t, has_policy<lightweight, Policy>());
else
return try_put_task_impl(t, std::false_type());
}
//! Adds src to the list of cached predecessors.
bool register_predecessor( predecessor_type &src ) override {
operation_type op_data(reg_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
return true;
}
//! Removes src from the list of cached predecessors.
bool remove_predecessor( predecessor_type &src ) override {
operation_type op_data(rem_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
return true;
}
protected:
void reset_function_input_base( reset_flags f) {
my_concurrency = 0;
if(my_queue) {
my_queue->reset();
}
reset_receiver(f);
forwarder_busy = false;
}
graph& my_graph_ref;
const size_t my_max_concurrency;
size_t my_concurrency;
node_priority_t my_priority;
const bool my_is_no_throw;
input_queue_type *my_queue;
predecessor_cache<input_type, null_mutex > my_predecessors;
void reset_receiver( reset_flags f) {
if( f & rf_clear_edges) my_predecessors.clear();
else
my_predecessors.reset();
__TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
}
graph& graph_reference() const override {
return my_graph_ref;
}
graph_task* try_get_postponed_task(const input_type& i) {
operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item
my_aggregator.execute(&op_data);
return op_data.bypass_t;
}
private:
friend class apply_body_task_bypass< class_type, input_type >;
friend class forward_task_bypass< class_type >;
class operation_type : public aggregated_operation< operation_type > {
public:
char type;
union {
input_type *elem;
predecessor_type *r;
};
graph_task* bypass_t;
operation_type(const input_type& e, op_type t) :
type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr) {}
operation_type(op_type t) : type(char(t)), r(nullptr), bypass_t(nullptr) {}
};
bool forwarder_busy;
typedef aggregating_functor<class_type, operation_type> handler_type;
friend class aggregating_functor<class_type, operation_type>;
aggregator< handler_type, operation_type > my_aggregator;
graph_task* perform_queued_requests() {
graph_task* new_task = nullptr;
if(my_queue) {
if(!my_queue->empty()) {
++my_concurrency;
new_task = create_body_task(my_queue->front());
my_queue->pop();
}
}
else {
input_type i;
if(my_predecessors.get_item(i)) {
++my_concurrency;
new_task = create_body_task(i);
}
}
return new_task;
}
void handle_operations(operation_type *op_list) {
operation_type* tmp;
while (op_list) {
tmp = op_list;
op_list = op_list->next;
switch (tmp->type) {
case reg_pred:
my_predecessors.add(*(tmp->r));
tmp->status.store(SUCCEEDED, std::memory_order_release);
if (!forwarder_busy) {
forwarder_busy = true;
spawn_forward_task();
}
break;
case rem_pred:
my_predecessors.remove(*(tmp->r));
tmp->status.store(SUCCEEDED, std::memory_order_release);
break;
case app_body_bypass: {
tmp->bypass_t = nullptr;
__TBB_ASSERT(my_max_concurrency != 0, nullptr);
--my_concurrency;
if(my_concurrency<my_max_concurrency)
tmp->bypass_t = perform_queued_requests();
tmp->status.store(SUCCEEDED, std::memory_order_release);
}
break;
case tryput_bypass: internal_try_put_task(tmp); break;
case try_fwd: internal_forward(tmp); break;
case occupy_concurrency:
if (my_concurrency < my_max_concurrency) {
++my_concurrency;
tmp->status.store(SUCCEEDED, std::memory_order_release);
} else {
tmp->status.store(FAILED, std::memory_order_release);
}
break;
}
}
}
//! Put to the node, but return the task instead of enqueueing it
void internal_try_put_task(operation_type *op) {
__TBB_ASSERT(my_max_concurrency != 0, nullptr);
if (my_concurrency < my_max_concurrency) {
++my_concurrency;
graph_task * new_task = create_body_task(*(op->elem));
op->bypass_t = new_task;
op->status.store(SUCCEEDED, std::memory_order_release);
} else if ( my_queue && my_queue->push(*(op->elem)) ) {
op->bypass_t = SUCCESSFULLY_ENQUEUED;
op->status.store(SUCCEEDED, std::memory_order_release);
} else {
op->bypass_t = nullptr;
op->status.store(FAILED, std::memory_order_release);
}
}
//! Creates tasks for postponed messages if available and if concurrency allows
void internal_forward(operation_type *op) {
op->bypass_t = nullptr;
if (my_concurrency < my_max_concurrency)
op->bypass_t = perform_queued_requests();
if(op->bypass_t)
op->status.store(SUCCEEDED, std::memory_order_release);
else {
forwarder_busy = false;
op->status.store(FAILED, std::memory_order_release);
}
}
graph_task* internal_try_put_bypass( const input_type& t ) {
operation_type op_data(t, tryput_bypass);
my_aggregator.execute(&op_data);
if( op_data.status == SUCCEEDED ) {
return op_data.bypass_t;
}
return nullptr;
}
graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) {
if( my_max_concurrency == 0 ) {
return apply_body_bypass(t);
} else {
operation_type check_op(t, occupy_concurrency);
my_aggregator.execute(&check_op);
if( check_op.status == SUCCEEDED ) {
return apply_body_bypass(t);
}
return internal_try_put_bypass(t);
}
}
graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) {
if( my_max_concurrency == 0 ) {
return create_body_task(t);
} else {
return internal_try_put_bypass(t);
}
}
//! Applies the body to the provided input
// then decides if more work is available
graph_task* apply_body_bypass( const input_type &i ) {
return static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
}
//! allocates a task to apply a body
graph_task* create_body_task( const input_type &input ) {
if (!is_graph_active(my_graph_ref)) {
return nullptr;
}
// TODO revamp: extract helper for common graph task allocation part
small_object_allocator allocator{};
typedef apply_body_task_bypass<class_type, input_type> task_type;
graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority );
graph_reference().reserve_wait();
return t;
}
//! This is executed by an enqueued task, the "forwarder"
graph_task* forward_task() {
operation_type op_data(try_fwd);
graph_task* rval = nullptr;
do {
op_data.status = WAIT;
my_aggregator.execute(&op_data);
if(op_data.status == SUCCEEDED) {
graph_task* ttask = op_data.bypass_t;
__TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, nullptr);
rval = combine_tasks(my_graph_ref, rval, ttask);
}
} while (op_data.status == SUCCEEDED);
return rval;
}
inline graph_task* create_forward_task() {
if (!is_graph_active(my_graph_ref)) {
return nullptr;
}
small_object_allocator allocator{};
typedef forward_task_bypass<class_type> task_type;
graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority );
graph_reference().reserve_wait();
return t;
}
//! Spawns a task that calls forward()
inline void spawn_forward_task() {
graph_task* tp = create_forward_task();
if(tp) {
spawn_in_graph_arena(graph_reference(), *tp);
}
}
node_priority_t priority() const override { return my_priority; }
}; // function_input_base
//! Implements methods for a function node that takes a type Input as input and sends
// a type Output to its successors.
template< typename Input, typename Output, typename Policy, typename A>
class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > {
public:
typedef Input input_type;
typedef Output output_type;
typedef function_body<input_type, output_type> function_body_type;
typedef function_input<Input, Output, Policy,A> my_class;
typedef function_input_base<Input, Policy, A, my_class> base_type;
typedef function_input_queue<input_type, A> input_queue_type;
// constructor
template<typename Body>
function_input(
graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority )
: base_type(g, max_concurrency, a_priority, noexcept(tbb::detail::invoke(body, input_type())))
, my_body( new function_body_leaf< input_type, output_type, Body>(body) )
, my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) {
}
//! Copy constructor
function_input( const function_input& src ) :
base_type(src),
my_body( src.my_init_body->clone() ),
my_init_body(src.my_init_body->clone() ) {
}
#if __INTEL_COMPILER <= 2021
// Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited
// class while the parent class has the virtual keyword for the destrocutor.
virtual
#endif
~function_input() {
delete my_body;
delete my_init_body;
}
template< typename Body >
Body copy_function_object() {
function_body_type &body_ref = *this->my_body;
return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
}
output_type apply_body_impl( const input_type& i) {
// There is an extra copied needed to capture the
// body execution without the try_put
fgt_begin_body( my_body );
output_type v = tbb::detail::invoke(*my_body, i);
fgt_end_body( my_body );
return v;
}
//TODO: consider moving into the base class
graph_task* apply_body_impl_bypass( const input_type &i) {
output_type v = apply_body_impl(i);
graph_task* postponed_task = nullptr;
if( base_type::my_max_concurrency != 0 ) {
postponed_task = base_type::try_get_postponed_task(i);
__TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, nullptr);
}
if( postponed_task ) {
// make the task available for other workers since we do not know successors'
// execution policy
spawn_in_graph_arena(base_type::graph_reference(), *postponed_task);
}
graph_task* successor_task = successors().try_put_task(v);
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127) /* suppress conditional expression is constant */
#endif
if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
if(!successor_task) {
// Return confirmative status since current
// node's body has been executed anyway
successor_task = SUCCESSFULLY_ENQUEUED;
}
}
return successor_task;
}
protected:
void reset_function_input(reset_flags f) {
base_type::reset_function_input_base(f);
if(f & rf_reset_bodies) {
function_body_type *tmp = my_init_body->clone();
delete my_body;
my_body = tmp;
}
}
function_body_type *my_body;
function_body_type *my_init_body;
virtual broadcast_cache<output_type > &successors() = 0;
}; // function_input
// helper templates to clear the successor edges of the output ports of an multifunction_node
template<int N> struct clear_element {
template<typename P> static void clear_this(P &p) {
(void)std::get<N-1>(p).successors().clear();
clear_element<N-1>::clear_this(p);
}
#if TBB_USE_ASSERT
template<typename P> static bool this_empty(P &p) {
if(std::get<N-1>(p).successors().empty())
return clear_element<N-1>::this_empty(p);
return false;
}
#endif
};
template<> struct clear_element<1> {
template<typename P> static void clear_this(P &p) {
(void)std::get<0>(p).successors().clear();
}
#if TBB_USE_ASSERT
template<typename P> static bool this_empty(P &p) {
return std::get<0>(p).successors().empty();
}
#endif
};
template <typename OutputTuple>
struct init_output_ports {
template <typename... Args>
static OutputTuple call(graph& g, const std::tuple<Args...>&) {
return OutputTuple(Args(g)...);
}
}; // struct init_output_ports
//! Implements methods for a function node that takes a type Input as input
// and has a tuple of output ports specified.
template< typename Input, typename OutputPortSet, typename Policy, typename A>
class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > {
public:
static const int N = std::tuple_size<OutputPortSet>::value;
typedef Input input_type;
typedef OutputPortSet output_ports_type;
typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class;
typedef function_input_base<Input, Policy, A, my_class> base_type;
typedef function_input_queue<input_type, A> input_queue_type;
// constructor
template<typename Body>
multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority )
: base_type(g, max_concurrency, a_priority, noexcept(tbb::detail::invoke(body, input_type(), my_output_ports)))
, my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
, my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
, my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){
}
//! Copy constructor
multifunction_input( const multifunction_input& src ) :
base_type(src),
my_body( src.my_init_body->clone() ),
my_init_body(src.my_init_body->clone() ),
my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) {
}
~multifunction_input() {
delete my_body;
delete my_init_body;
}
template< typename Body >
Body copy_function_object() {
multifunction_body_type &body_ref = *this->my_body;
return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
}
// for multifunction nodes we do not have a single successor as such. So we just tell
// the task we were successful.
//TODO: consider moving common parts with implementation in function_input into separate function
graph_task* apply_body_impl_bypass( const input_type &i ) {
fgt_begin_body( my_body );
(*my_body)(i, my_output_ports);
fgt_end_body( my_body );
graph_task* ttask = nullptr;
if(base_type::my_max_concurrency != 0) {
ttask = base_type::try_get_postponed_task(i);
}
return ttask ? ttask : SUCCESSFULLY_ENQUEUED;
}
output_ports_type &output_ports(){ return my_output_ports; }
protected:
void reset(reset_flags f) {
base_type::reset_function_input_base(f);
if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
if(f & rf_reset_bodies) {
multifunction_body_type* tmp = my_init_body->clone();
delete my_body;
my_body = tmp;
}
__TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
}
multifunction_body_type *my_body;
multifunction_body_type *my_init_body;
output_ports_type my_output_ports;
}; // multifunction_input
// template to refer to an output port of a multifunction_node
template<size_t N, typename MOP>
typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
return std::get<N>(op.output_ports());
}
inline void check_task_and_spawn(graph& g, graph_task* t) {
if (t && t != SUCCESSFULLY_ENQUEUED) {
spawn_in_graph_arena(g, *t);
}
}
// helper structs for split_node
template<int N>
struct emit_element {
template<typename T, typename P>
static graph_task* emit_this(graph& g, const T &t, P &p) {
// TODO: consider to collect all the tasks in task_list and spawn them all at once
graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t));
check_task_and_spawn(g, last_task);
return emit_element<N-1>::emit_this(g,t,p);
}
};
template<>
struct emit_element<1> {
template<typename T, typename P>
static graph_task* emit_this(graph& g, const T &t, P &p) {
graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t));
check_task_and_spawn(g, last_task);
return SUCCESSFULLY_ENQUEUED;
}
};
//! Implements methods for an executable node that takes continue_msg as input
template< typename Output, typename Policy>
class continue_input : public continue_receiver {
public:
//! The input type of this receiver
typedef continue_msg input_type;
//! The output type of this receiver
typedef Output output_type;
typedef function_body<input_type, output_type> function_body_type;
typedef continue_input<output_type, Policy> class_type;
template< typename Body >
continue_input( graph &g, Body& body, node_priority_t a_priority )
: continue_receiver(/*number_of_predecessors=*/0, a_priority)
, my_graph_ref(g)
, my_body( new function_body_leaf< input_type, output_type, Body>(body) )
, my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
{ }
template< typename Body >
continue_input( graph &g, int number_of_predecessors,
Body& body, node_priority_t a_priority )
: continue_receiver( number_of_predecessors, a_priority )
, my_graph_ref(g)
, my_body( new function_body_leaf< input_type, output_type, Body>(body) )
, my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
{ }
continue_input( const continue_input& src ) : continue_receiver(src),
my_graph_ref(src.my_graph_ref),
my_body( src.my_init_body->clone() ),
my_init_body( src.my_init_body->clone() ) {}
~continue_input() {
delete my_body;
delete my_init_body;
}
template< typename Body >
Body copy_function_object() {
function_body_type &body_ref = *my_body;
return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
}
void reset_receiver( reset_flags f) override {
continue_receiver::reset_receiver(f);
if(f & rf_reset_bodies) {
function_body_type *tmp = my_init_body->clone();
delete my_body;
my_body = tmp;
}
}
protected:
graph& my_graph_ref;
function_body_type *my_body;
function_body_type *my_init_body;
virtual broadcast_cache<output_type > &successors() = 0;
friend class apply_body_task_bypass< class_type, continue_msg >;
//! Applies the body to the provided input
graph_task* apply_body_bypass( input_type ) {
// There is an extra copied needed to capture the
// body execution without the try_put
fgt_begin_body( my_body );
output_type v = (*my_body)( continue_msg() );
fgt_end_body( my_body );
return successors().try_put_task( v );
}
graph_task* execute() override {
if(!is_graph_active(my_graph_ref)) {
return nullptr;
}
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127) /* suppress conditional expression is constant */
#endif
if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
return apply_body_bypass( continue_msg() );
}
else {
small_object_allocator allocator{};
typedef apply_body_task_bypass<class_type, continue_msg> task_type;
graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority );
graph_reference().reserve_wait();
return t;
}
}
graph& graph_reference() const override {
return my_graph_ref;
}
}; // continue_input
//! Implements methods for both executable and function nodes that puts Output to its successors
template< typename Output >
class function_output : public sender<Output> {
public:
template<int N> friend struct clear_element;
typedef Output output_type;
typedef typename sender<output_type>::successor_type successor_type;
typedef broadcast_cache<output_type> broadcast_cache_type;
function_output(graph& g) : my_successors(this), my_graph_ref(g) {}
function_output(const function_output& other) = delete;
//! Adds a new successor to this node
bool register_successor( successor_type &r ) override {
successors().register_successor( r );
return true;
}
//! Removes a successor from this node
bool remove_successor( successor_type &r ) override {
successors().remove_successor( r );
return true;
}
broadcast_cache_type &successors() { return my_successors; }
graph& graph_reference() const { return my_graph_ref; }
protected:
broadcast_cache_type my_successors;
graph& my_graph_ref;
}; // function_output
template< typename Output >
class multifunction_output : public function_output<Output> {
public:
typedef Output output_type;
typedef function_output<output_type> base_type;
using base_type::my_successors;
multifunction_output(graph& g) : base_type(g) {}
multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {}
bool try_put(const output_type &i) {
graph_task *res = try_put_task(i);
if( !res ) return false;
if( res != SUCCESSFULLY_ENQUEUED ) {
// wrapping in task_arena::execute() is not needed since the method is called from
// inside task::execute()
spawn_in_graph_arena(graph_reference(), *res);
}
return true;
}
using base_type::graph_reference;
protected:
graph_task* try_put_task(const output_type &i) {
return my_successors.try_put_task(i);
}
template <int N> friend struct emit_element;
}; // multifunction_output
//composite_node
template<typename CompositeType>
void add_nodes_impl(CompositeType*, bool) {}
template< typename CompositeType, typename NodeType1, typename... NodeTypes >
void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
void *addr = const_cast<NodeType1 *>(&n1);
fgt_alias_port(c_node, addr, visible);
add_nodes_impl(c_node, visible, n...);
}
#endif // __TBB__flow_graph_node_impl_H

View file

@ -0,0 +1,266 @@
// clang-format off
/*
Copyright (c) 2020-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_flow_graph_node_set_impl_H
#define __TBB_flow_graph_node_set_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// Included in namespace tbb::detail::d1 (in flow_graph.h)
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get
// Seems like the well-formed expression in trailing decltype is treated as ill-formed
// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution
#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900)
namespace order {
struct undefined {};
struct following {};
struct preceding {};
}
class get_graph_helper {
public:
// TODO: consider making graph_reference() public and consistent interface to get a reference to the graph
// and remove get_graph_helper
template <typename T>
static graph& get(const T& object) {
return get_impl(object, std::is_base_of<graph_node, T>());
}
private:
// Get graph from the object of type derived from graph_node
template <typename T>
static graph& get_impl(const T& object, std::true_type) {
return static_cast<const graph_node*>(&object)->my_graph;
}
template <typename T>
static graph& get_impl(const T& object, std::false_type) {
return object.graph_reference();
}
};
template<typename Order, typename... Nodes>
struct node_set {
typedef Order order_type;
std::tuple<Nodes&...> nodes;
node_set(Nodes&... ns) : nodes(ns...) {}
template <typename... Nodes2>
node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {}
graph& graph_reference() const {
return get_graph_helper::get(std::get<0>(nodes));
}
};
namespace alias_helpers {
template <typename T> using output_type = typename T::output_type;
template <typename T> using output_ports_type = typename T::output_ports_type;
template <typename T> using input_type = typename T::input_type;
template <typename T> using input_ports_type = typename T::input_ports_type;
} // namespace alias_helpers
template <typename T>
using has_output_type = supports<T, alias_helpers::output_type>;
template <typename T>
using has_input_type = supports<T, alias_helpers::input_type>;
template <typename T>
using has_input_ports_type = supports<T, alias_helpers::input_ports_type>;
template <typename T>
using has_output_ports_type = supports<T, alias_helpers::output_ports_type>;
template<typename T>
struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {};
template<typename T>
struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {};
template <typename Node>
struct is_async_node : std::false_type {};
template <typename... Args>
struct is_async_node<async_node<Args...>> : std::true_type {};
template<typename FirstPredecessor, typename... Predecessors>
node_set<order::following, FirstPredecessor, Predecessors...>
follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) {
static_assert((conjunction<has_output_type<FirstPredecessor>,
has_output_type<Predecessors>...>::value),
"Not all node's predecessors has output_type typedef");
static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value),
"Not all node's predecessors are senders");
return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...);
}
template<typename... Predecessors>
node_set<order::following, Predecessors...>
follows(node_set<order::undefined, Predecessors...>& predecessors_set) {
static_assert((conjunction<has_output_type<Predecessors>...>::value),
"Not all nodes in the set has output_type typedef");
static_assert((conjunction<is_sender<Predecessors>...>::value),
"Not all nodes in the set are senders");
return node_set<order::following, Predecessors...>(predecessors_set);
}
template<typename FirstSuccessor, typename... Successors>
node_set<order::preceding, FirstSuccessor, Successors...>
precedes(FirstSuccessor& first_successor, Successors&... successors) {
static_assert((conjunction<has_input_type<FirstSuccessor>,
has_input_type<Successors>...>::value),
"Not all node's successors has input_type typedef");
static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value),
"Not all node's successors are receivers");
return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...);
}
template<typename... Successors>
node_set<order::preceding, Successors...>
precedes(node_set<order::undefined, Successors...>& successors_set) {
static_assert((conjunction<has_input_type<Successors>...>::value),
"Not all nodes in the set has input_type typedef");
static_assert((conjunction<is_receiver<Successors>...>::value),
"Not all nodes in the set are receivers");
return node_set<order::preceding, Successors...>(successors_set);
}
template <typename Node, typename... Nodes>
node_set<order::undefined, Node, Nodes...>
make_node_set(Node& first_node, Nodes&... nodes) {
return node_set<order::undefined, Node, Nodes...>(first_node, nodes...);
}
template<size_t I>
class successor_selector {
template <typename NodeType>
static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) {
return input_port<I>(node);
}
template <typename NodeType>
static NodeType& get_impl(NodeType& node, std::false_type) { return node; }
public:
template <typename NodeType>
#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
static auto& get(NodeType& node)
#else
static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>()))
#endif
{
return get_impl(node, has_input_ports_type<NodeType>());
}
};
template<size_t I>
class predecessor_selector {
template <typename NodeType>
static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) {
return output_port<I>(node);
}
template <typename NodeType>
static NodeType& internal_get(NodeType& node, std::false_type) { return node;}
template <typename NodeType>
#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
static auto& get_impl(NodeType& node, std::false_type)
#else
static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>()))
#endif
{
return internal_get(node, has_output_ports_type<NodeType>());
}
template <typename AsyncNode>
static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; }
public:
template <typename NodeType>
#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
static auto& get(NodeType& node)
#else
static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>()))
#endif
{
return get_impl(node, is_async_node<NodeType>());
}
};
template<size_t I>
class make_edges_helper {
public:
template<typename PredecessorsTuple, typename NodeType>
static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
make_edge(std::get<I>(predecessors), successor_selector<I>::get(node));
make_edges_helper<I - 1>::connect_predecessors(predecessors, node);
}
template<typename SuccessorsTuple, typename NodeType>
static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
make_edge(predecessor_selector<I>::get(node), std::get<I>(successors));
make_edges_helper<I - 1>::connect_successors(node, successors);
}
};
template<>
struct make_edges_helper<0> {
template<typename PredecessorsTuple, typename NodeType>
static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
make_edge(std::get<0>(predecessors), successor_selector<0>::get(node));
}
template<typename SuccessorsTuple, typename NodeType>
static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
make_edge(predecessor_selector<0>::get(node), std::get<0>(successors));
}
};
// TODO: consider adding an overload for making edges between node sets
template<typename NodeType, typename OrderFlagType, typename... Args>
void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) {
const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value;
make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node);
}
template <typename NodeType, typename OrderFlagType, typename... Args>
void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) {
const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value;
make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes);
}
template <typename NodeType, typename... Nodes>
void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) {
make_edges(ns, node);
}
template <typename NodeType, typename... Nodes>
void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) {
make_edges(node, ns);
}
#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
#endif // __TBB_flow_graph_node_set_impl_H

View file

@ -0,0 +1,278 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_flow_graph_nodes_deduction_H
#define __TBB_flow_graph_nodes_deduction_H
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
namespace tbb {
namespace detail {
namespace d1 {
template <typename Input, typename Output>
struct declare_body_types {
using input_type = Input;
using output_type = Output;
};
struct NoInputBody {};
template <typename Output>
struct declare_body_types<NoInputBody, Output> {
using output_type = Output;
};
template <typename T> struct body_types;
template <typename T, typename Input, typename Output>
struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {};
template <typename T, typename Input, typename Output>
struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {};
template <typename T, typename Input, typename Output>
struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {};
template <typename T, typename Input, typename Output>
struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {};
template <typename T, typename Output>
struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {};
template <typename T, typename Output>
struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {};
template <typename Input, typename Output>
struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {};
template <typename Input, typename Output>
struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {};
template <typename Output>
struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {};
template <typename Body>
using input_t = typename body_types<Body>::input_type;
template <typename Body>
using output_t = typename body_types<Body>::output_type;
template <typename T, typename Input, typename Output>
auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name);
template <typename T, typename Input, typename Output>
auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name);
template <typename T, typename Input, typename Output>
auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name);
template <typename T, typename Input, typename Output>
auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name);
template <typename Input, typename Output>
auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name);
template <typename Input, typename Output>
auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name);
template <typename Body>
decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int);
template <typename Body>
decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...);
// Deduction guides for Flow Graph nodes
template <typename GraphOrSet, typename Body>
input_node(GraphOrSet&&, Body)
->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename NodeSet>
struct decide_on_set;
template <typename Node, typename... Nodes>
struct decide_on_set<node_set<order::following, Node, Nodes...>> {
using type = typename Node::output_type;
};
template <typename Node, typename... Nodes>
struct decide_on_set<node_set<order::preceding, Node, Nodes...>> {
using type = typename Node::input_type;
};
template <typename NodeSet>
using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type;
template <typename NodeSet>
broadcast_node(const NodeSet&)
->broadcast_node<decide_on_set_t<NodeSet>>;
template <typename NodeSet>
buffer_node(const NodeSet&)
->buffer_node<decide_on_set_t<NodeSet>>;
template <typename NodeSet>
queue_node(const NodeSet&)
->queue_node<decide_on_set_t<NodeSet>>;
#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename GraphOrProxy, typename Sequencer>
sequencer_node(GraphOrProxy&&, Sequencer)
->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename NodeSet, typename Compare>
priority_queue_node(const NodeSet&, const Compare&)
->priority_queue_node<decide_on_set_t<NodeSet>, Compare>;
template <typename NodeSet>
priority_queue_node(const NodeSet&)
->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>;
#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename Key>
struct join_key {
using type = Key;
};
template <typename T>
struct join_key<const T&> {
using type = T&;
};
template <typename Key>
using join_key_t = typename join_key<Key>::type;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename Policy, typename... Predecessors>
join_node(const node_set<order::following, Predecessors...>&, Policy)
->join_node<std::tuple<typename Predecessors::output_type...>,
Policy>;
template <typename Policy, typename Successor, typename... Successors>
join_node(const node_set<order::preceding, Successor, Successors...>&, Policy)
->join_node<typename Successor::input_type, Policy>;
template <typename... Predecessors>
join_node(const node_set<order::following, Predecessors...>)
->join_node<std::tuple<typename Predecessors::output_type...>,
queueing>;
template <typename Successor, typename... Successors>
join_node(const node_set<order::preceding, Successor, Successors...>)
->join_node<typename Successor::input_type, queueing>;
#endif
template <typename GraphOrProxy, typename Body, typename... Bodies>
join_node(GraphOrProxy&&, Body, Bodies...)
->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>,
input_t<decltype(decide_on_callable_type<Bodies>(0))>...>,
key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename... Predecessors>
indexer_node(const node_set<order::following, Predecessors...>&)
->indexer_node<typename Predecessors::output_type...>;
#endif
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename NodeSet>
limiter_node(const NodeSet&, size_t)
->limiter_node<decide_on_set_t<NodeSet>>;
template <typename Predecessor, typename... Predecessors>
split_node(const node_set<order::following, Predecessor, Predecessors...>&)
->split_node<typename Predecessor::output_type>;
template <typename... Successors>
split_node(const node_set<order::preceding, Successors...>&)
->split_node<std::tuple<typename Successors::input_type...>>;
#endif
template <typename GraphOrSet, typename Body, typename Policy>
function_node(GraphOrSet&&,
size_t, Body,
Policy, node_priority_t = no_priority)
->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
output_t<decltype(decide_on_callable_type<Body>(0))>,
Policy>;
template <typename GraphOrSet, typename Body>
function_node(GraphOrSet&&, size_t,
Body, node_priority_t = no_priority)
->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
output_t<decltype(decide_on_callable_type<Body>(0))>,
queueing>;
template <typename Output>
struct continue_output {
using type = Output;
};
template <>
struct continue_output<void> {
using type = continue_msg;
};
template <typename T>
using continue_output_t = typename continue_output<T>::type;
template <typename GraphOrSet, typename Body, typename Policy>
continue_node(GraphOrSet&&, Body,
Policy, node_priority_t = no_priority)
->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
Policy>;
template <typename GraphOrSet, typename Body, typename Policy>
continue_node(GraphOrSet&&,
int, Body,
Policy, node_priority_t = no_priority)
->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
Policy>;
template <typename GraphOrSet, typename Body>
continue_node(GraphOrSet&&,
Body, node_priority_t = no_priority)
->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>;
template <typename GraphOrSet, typename Body>
continue_node(GraphOrSet&&, int,
Body, node_priority_t = no_priority)
->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
Policy<void>>;
#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
template <typename NodeSet>
overwrite_node(const NodeSet&)
->overwrite_node<decide_on_set_t<NodeSet>>;
template <typename NodeSet>
write_once_node(const NodeSet&)
->write_once_node<decide_on_set_t<NodeSet>>;
#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
#endif // __TBB_flow_graph_nodes_deduction_H

View file

@ -0,0 +1,258 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// a hash table buffer that can expand, and can support as many deletions as
// additions, list-based, with elements of list held in array (for destruction
// management), multiplicative hashing (like ets). No synchronization built-in.
//
#ifndef __TBB__flow_graph_hash_buffer_impl_H
#define __TBB__flow_graph_hash_buffer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::flow::interfaceX::internal
// elements in the table are a simple list; we need pointer to next element to
// traverse the chain
template<typename ValueType>
struct buffer_element_type {
// the second parameter below is void * because we can't forward-declare the type
// itself, so we just reinterpret_cast below.
typedef typename aligned_pair<ValueType, void *>::type type;
};
template
<
typename Key, // type of key within ValueType
typename ValueType,
typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType
typename HashCompare, // has hash and equal
typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type >
>
class hash_buffer : public HashCompare {
public:
static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table
typedef ValueType value_type;
typedef typename buffer_element_type< value_type >::type element_type;
typedef value_type *pointer_type;
typedef element_type *list_array_type; // array we manage manually
typedef list_array_type *pointer_array_type;
typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type;
typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator;
typedef typename std::decay<Key>::type Knoref;
private:
ValueToKey *my_key;
size_t my_size;
size_t nelements;
pointer_array_type pointer_array; // pointer_array[my_size]
list_array_type elements_array; // elements_array[my_size / 2]
element_type* free_list;
size_t mask() { return my_size - 1; }
void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
for(size_t i=0; i < sz - 1; ++i ) { // construct free list
la[i].second = &(la[i+1]);
}
la[sz-1].second = nullptr;
*p_free_list = (element_type *)&(la[0]);
}
// cleanup for exceptions
struct DoCleanup {
pointer_array_type *my_pa;
list_array_type *my_elements;
size_t my_size;
DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
my_pa(&pa), my_elements(&my_els), my_size(sz) { }
~DoCleanup() {
if(my_pa) {
size_t dont_care = 0;
internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
}
}
};
// exception-safety requires we do all the potentially-throwing operations first
void grow_array() {
size_t new_size = my_size*2;
size_t new_nelements = nelements; // internal_free_buffer zeroes this
list_array_type new_elements_array = nullptr;
pointer_array_type new_pointer_array = nullptr;
list_array_type new_free_list = nullptr;
{
DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
new_elements_array = elements_array_allocator().allocate(my_size);
new_pointer_array = pointer_array_allocator_type().allocate(new_size);
for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = nullptr;
set_up_free_list(&new_free_list, new_elements_array, my_size );
for(size_t i=0; i < my_size; ++i) {
for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) {
value_type *ov = reinterpret_cast<value_type *>(&(op->first));
// could have std::move semantics
internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov);
}
}
my_cleanup.my_pa = nullptr;
my_cleanup.my_elements = nullptr;
}
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
free_list = new_free_list;
pointer_array = new_pointer_array;
elements_array = new_elements_array;
my_size = new_size;
nelements = new_nelements;
}
// v should have perfect forwarding if std::move implemented.
// we use this method to move elements in grow_array, so can't use class fields
void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
const value_type &v) {
size_t l_mask = p_sz-1;
__TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
size_t h = this->hash(tbb::detail::invoke(*my_key, v)) & l_mask;
__TBB_ASSERT(p_free_list, "Error: free list not set up.");
element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second);
(void) new(&(my_elem->first)) value_type(v);
my_elem->second = p_pointer_array[h];
p_pointer_array[h] = my_elem;
}
void internal_initialize_buffer() {
pointer_array = pointer_array_allocator_type().allocate(my_size);
for(size_t i = 0; i < my_size; ++i) pointer_array[i] = nullptr;
elements_array = elements_array_allocator().allocate(my_size / 2);
set_up_free_list(&free_list, elements_array, my_size / 2);
}
// made static so an enclosed class can use to properly dispose of the internals
static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
if(pa) {
for(size_t i = 0; i < sz; ++i ) {
element_type *p_next;
for( element_type *p = pa[i]; p; p = p_next) {
p_next = (element_type *)p->second;
// TODO revamp: make sure type casting is correct.
void* ptr = (void*)(p->first);
#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER
suppress_unused_warning(ptr);
#endif
((value_type*)ptr)->~value_type();
}
}
pointer_array_allocator_type().deallocate(pa, sz);
pa = nullptr;
}
// Separate test (if allocation of pa throws, el may be allocated.
// but no elements will be constructed.)
if(el) {
elements_array_allocator().deallocate(el, sz / 2);
el = nullptr;
}
sz = INITIAL_SIZE;
ne = 0;
}
public:
hash_buffer() : my_key(nullptr), my_size(INITIAL_SIZE), nelements(0) {
internal_initialize_buffer();
}
~hash_buffer() {
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
delete my_key;
my_key = nullptr;
}
hash_buffer(const hash_buffer&) = delete;
hash_buffer& operator=(const hash_buffer&) = delete;
void reset() {
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
internal_initialize_buffer();
}
// Take ownership of func object allocated with new.
// This method is only used internally, so can't be misused by user.
void set_key_func(ValueToKey *vtk) { my_key = vtk; }
// pointer is used to clone()
ValueToKey* get_key_func() { return my_key; }
bool insert_with_key(const value_type &v) {
pointer_type p = nullptr;
__TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
if(find_ref_with_key(tbb::detail::invoke(*my_key, v), p)) {
p->~value_type();
(void) new(p) value_type(v); // copy-construct into the space
return false;
}
++nelements;
if(nelements*2 > my_size) grow_array();
internal_insert_with_key(pointer_array, my_size, free_list, v);
return true;
}
// returns true and sets v to array element if found, else returns false.
bool find_ref_with_key(const Knoref& k, pointer_type &v) {
size_t i = this->hash(k) & mask();
for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) {
pointer_type pv = reinterpret_cast<pointer_type>(&(p->first));
__TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
if(this->equal(tbb::detail::invoke(*my_key, *pv), k)) {
v = pv;
return true;
}
}
return false;
}
bool find_with_key( const Knoref& k, value_type &v) {
value_type *p;
if(find_ref_with_key(k, p)) {
v = *p;
return true;
}
else
return false;
}
void delete_with_key(const Knoref& k) {
size_t h = this->hash(k) & mask();
element_type* prev = nullptr;
for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) {
value_type *vp = reinterpret_cast<value_type *>(&(p->first));
__TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
if(this->equal(tbb::detail::invoke(*my_key, *vp), k)) {
vp->~value_type();
if(prev) prev->second = p->second;
else pointer_array[h] = (element_type *)(p->second);
p->second = free_list;
free_list = p;
--nelements;
return;
}
}
__TBB_ASSERT(false, "key not found for delete");
}
};
#endif // __TBB__flow_graph_hash_buffer_impl_H

View file

@ -0,0 +1,365 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _FGT_GRAPH_TRACE_IMPL_H
#define _FGT_GRAPH_TRACE_IMPL_H
#include "third_party/tbb/profiling.hh"
#if (_MSC_VER >= 1900)
// MISSING #include <intrin.h>
#endif
namespace tbb {
namespace detail {
namespace d1 {
template< typename T > class sender;
template< typename T > class receiver;
#if TBB_USE_PROFILING_TOOLS
#if __TBB_FLOW_TRACE_CODEPTR
#if (_MSC_VER >= 1900)
#define CODEPTR() (_ReturnAddress())
#elif __TBB_GCC_VERSION >= 40800
#define CODEPTR() ( __builtin_return_address(0))
#else
#define CODEPTR() nullptr
#endif
#else
#define CODEPTR() nullptr
#endif /* __TBB_FLOW_TRACE_CODEPTR */
static inline void fgt_alias_port(void *node, void *p, bool visible) {
if(visible)
itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE );
else
itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE );
}
static inline void fgt_composite ( void* codeptr, void *node, void *graph ) {
itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE );
suppress_unused_warning( codeptr );
#if __TBB_FLOW_TRACE_CODEPTR
if (codeptr != nullptr) {
register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
}
#endif
}
static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) {
itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT );
}
static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) {
itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT );
}
template<typename InputType>
void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) {
// TODO: Make fgt_internal_alias_input_port a function template?
fgt_internal_alias_input_port( node, port, name_index);
}
template < typename PortsTuple, int N >
struct fgt_internal_input_alias_helper {
static void alias_port( void *node, PortsTuple &ports ) {
alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) );
fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
}
};
template < typename PortsTuple >
struct fgt_internal_input_alias_helper<PortsTuple, 0> {
static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { }
};
template<typename OutputType>
void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) {
// TODO: Make fgt_internal_alias_output_port a function template?
fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index);
}
template < typename PortsTuple, int N >
struct fgt_internal_output_alias_helper {
static void alias_port( void *node, PortsTuple &ports ) {
alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
}
};
template < typename PortsTuple >
struct fgt_internal_output_alias_helper<PortsTuple, 0> {
static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) {
}
};
static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) {
itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
}
static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) {
itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index);
suppress_unused_warning( codeptr );
#if __TBB_FLOW_TRACE_CODEPTR
if (codeptr != nullptr) {
register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
}
#endif
}
template<typename InputType>
void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) {
// TODO: Make fgt_internal_create_input_port a function template?
fgt_internal_create_input_port(node, static_cast<void*>(port), name_index);
}
template < typename PortsTuple, int N >
struct fgt_internal_input_helper {
static void register_port( void *node, PortsTuple &ports ) {
register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) );
fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports );
}
};
template < typename PortsTuple >
struct fgt_internal_input_helper<PortsTuple, 1> {
static void register_port( void *node, PortsTuple &ports ) {
register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 );
}
};
template<typename OutputType>
void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) {
// TODO: Make fgt_internal_create_output_port a function template?
fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index);
}
template < typename PortsTuple, int N >
struct fgt_internal_output_helper {
static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports );
}
};
template < typename PortsTuple >
struct fgt_internal_output_helper<PortsTuple,1> {
static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 );
}
};
template< typename NodeType >
void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) );
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
}
template< typename NodeType >
void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) {
void *addr = const_cast<NodeType *>(node);
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
}
template< typename NodeType >
static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) );
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
}
static inline void fgt_graph_desc( const void *g, const char *desc ) {
void *addr = const_cast< void *>(g);
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc );
}
static inline void fgt_body( void *node, void *body ) {
itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
}
template< int N, typename PortsTuple >
static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) {
itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports );
}
template< int N, typename PortsTuple >
static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports );
fgt_body( input_port, body );
}
template< int N, typename PortsTuple >
static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports );
}
static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) {
itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t );
suppress_unused_warning( codeptr );
#if __TBB_FLOW_TRACE_CODEPTR
if (codeptr != nullptr) {
register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr);
}
#endif
}
static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
}
static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
fgt_body( output_port, body );
}
static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) {
fgt_node( codeptr, t, g, output_port );
fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
}
static inline void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) {
fgt_node_with_body( codeptr, t, g, output_port, body );
fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
}
static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
fgt_node( codeptr, t, g, input_port, output_port );
fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 );
}
static inline void fgt_make_edge( void *output_port, void *input_port ) {
itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
}
static inline void fgt_remove_edge( void *output_port, void *input_port ) {
itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
}
static inline void fgt_graph( void *g ) {
itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_GRAPH );
}
static inline void fgt_begin_body( void *body ) {
itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, nullptr, FLOW_NULL, FLOW_BODY );
}
static inline void fgt_end_body( void * ) {
itt_task_end( ITT_DOMAIN_FLOW );
}
static inline void fgt_async_try_put_begin( void *node, void *port ) {
itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT );
}
static inline void fgt_async_try_put_end( void *, void * ) {
itt_task_end( ITT_DOMAIN_FLOW );
}
static inline void fgt_async_reserve( void *node, void *graph ) {
itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL );
}
static inline void fgt_async_commit( void *node, void * /*graph*/) {
itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE );
}
static inline void fgt_reserve_wait( void *graph ) {
itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_NULL );
}
static inline void fgt_release_wait( void *graph ) {
itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH );
}
#else // TBB_USE_PROFILING_TOOLS
#define CODEPTR() nullptr
static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { }
static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { }
static inline void fgt_graph( void * /*g*/ ) { }
template< typename NodeType >
static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
template< typename NodeType >
static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { }
template< int N, typename PortsTuple >
static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
template< int N, typename PortsTuple >
static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
template< int N, typename PortsTuple >
static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { }
static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { }
static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
static inline void fgt_begin_body( void * /*body*/ ) { }
static inline void fgt_end_body( void * /*body*/) { }
static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { }
static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { }
static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { }
static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { }
static inline void fgt_reserve_wait( void * /*graph*/ ) { }
static inline void fgt_release_wait( void * /*graph*/ ) { }
template< typename NodeType >
void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
template < typename PortsTuple, int N >
struct fgt_internal_input_alias_helper {
static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
};
template < typename PortsTuple, int N >
struct fgt_internal_output_alias_helper {
static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
};
#endif // TBB_USE_PROFILING_TOOLS
} // d1
} // namespace detail
} // namespace tbb
#endif // _FGT_GRAPH_TRACE_IMPL_H

View file

@ -0,0 +1,408 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__flow_graph_types_impl_H
#define __TBB__flow_graph_types_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::detail::d1
// the change to key_matching (adding a K and KHash template parameter, making it a class)
// means we have to pass this data to the key_matching_port. All the ports have only one
// template parameter, so we have to wrap the following types in a trait:
//
// . K == key_type
// . KHash == hash and compare for Key
// . TtoK == function_body that given an object of T, returns its K
// . T == type accepted by port, and stored in the hash table
//
// The port will have an additional parameter on node construction, which is a function_body
// that accepts a const T& and returns a K which is the field in T which is its K.
template<typename Kp, typename KHashp, typename Tp>
struct KeyTrait {
typedef Kp K;
typedef Tp T;
typedef type_to_key_function_body<T,K> TtoK;
typedef KHashp KHash;
};
// wrap each element of a tuple in a template, and make a tuple of the result.
template<int N, template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements;
// A wrapper that generates the traits needed for each port of a key-matching join,
// and the type of the tuple of input ports.
template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple>
struct wrap_key_tuple_elements;
template<int N, template<class> class PT, typename... Args>
struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{
typedef typename std::tuple<PT<Args>... > type;
};
template<int N, template<class> class PT, typename KeyTraits, typename... Args>
struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > {
typedef typename KeyTraits::key_type K;
typedef typename KeyTraits::hash_compare_type KHash;
typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type;
};
template< int... S > class sequence {};
template< int N, int... S >
struct make_sequence : make_sequence < N - 1, N - 1, S... > {};
template< int... S >
struct make_sequence < 0, S... > {
typedef sequence<S...> type;
};
//! type mimicking std::pair but with trailing fill to ensure each element of an array
//* will have the correct alignment
template<typename T1, typename T2, size_t REM>
struct type_plus_align {
char first[sizeof(T1)];
T2 second;
char fill1[REM];
};
template<typename T1, typename T2>
struct type_plus_align<T1,T2,0> {
char first[sizeof(T1)];
T2 second;
};
template<class U> struct alignment_of {
typedef struct { char t; U padded; } test_alignment;
static const size_t value = sizeof(test_alignment) - sizeof(U);
};
// T1, T2 are actual types stored. The space defined for T1 in the type returned
// is a char array of the correct size. Type T2 should be trivially-constructible,
// T1 must be explicitly managed.
template<typename T1, typename T2>
struct aligned_pair {
static const size_t t1_align = alignment_of<T1>::value;
static const size_t t2_align = alignment_of<T2>::value;
typedef type_plus_align<T1, T2, 0 > just_pair;
static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
static const size_t extra_bytes = sizeof(just_pair) % max_align;
static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
public:
typedef type_plus_align<T1,T2,remainder> type;
}; // aligned_pair
// support for variant type
// type we use when we're not storing a value
struct default_constructed { };
// type which contains another type, tests for what type is contained, and references to it.
// Wrapper<T>
// void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
// struct to allow us to copy and test the type of objects
struct WrapperBase {
virtual ~WrapperBase() {}
virtual void CopyTo(void* /*newSpace*/) const = 0;
};
// Wrapper<T> contains a T, with the ability to test what T is. The Wrapper<T> can be
// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
// examined via value(), but not modified.
template<typename T>
struct Wrapper: public WrapperBase {
typedef T value_type;
typedef T* pointer_type;
private:
T value_space;
public:
const value_type &value() const { return value_space; }
private:
Wrapper();
// on exception will ensure the Wrapper will contain only a trivially-constructed object
struct _unwind_space {
pointer_type space;
_unwind_space(pointer_type p) : space(p) {}
~_unwind_space() {
if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
}
};
public:
explicit Wrapper( const T& other ) : value_space(other) { }
explicit Wrapper(const Wrapper& other) = delete;
void CopyTo(void* newSpace) const override {
_unwind_space guard((pointer_type)newSpace);
(void) new(newSpace) Wrapper(value_space);
guard.space = nullptr;
}
~Wrapper() { }
};
// specialization for array objects
template<typename T, size_t N>
struct Wrapper<T[N]> : public WrapperBase {
typedef T value_type;
typedef T* pointer_type;
// space must be untyped.
typedef T ArrayType[N];
private:
// The space is not of type T[N] because when copy-constructing, it would be
// default-initialized and then copied to in some fashion, resulting in two
// constructions and one destruction per element. If the type is char[ ], we
// placement new into each element, resulting in one construction per element.
static const size_t space_size = sizeof(ArrayType);
char value_space[space_size];
// on exception will ensure the already-built objects will be destructed
// (the value_space is a char array, so it is already trivially-destructible.)
struct _unwind_class {
pointer_type space;
int already_built;
_unwind_class(pointer_type p) : space(p), already_built(0) {}
~_unwind_class() {
if(space) {
for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
(void) new(space) Wrapper<default_constructed>(default_constructed());
}
}
};
public:
const ArrayType &value() const {
char *vp = const_cast<char *>(value_space);
return reinterpret_cast<ArrayType &>(*vp);
}
private:
Wrapper();
public:
// have to explicitly construct because other decays to a const value_type*
explicit Wrapper(const ArrayType& other) {
_unwind_class guard((pointer_type)value_space);
pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
for(size_t i = 0; i < N; ++i ) {
(void) new(vp++) value_type(other[i]);
++(guard.already_built);
}
guard.space = nullptr;
}
explicit Wrapper(const Wrapper& other) : WrapperBase() {
// we have to do the heavy lifting to copy contents
_unwind_class guard((pointer_type)value_space);
pointer_type dp = reinterpret_cast<pointer_type>(value_space);
pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
(void) new(dp) value_type(*sp);
++(guard.already_built);
}
guard.space = nullptr;
}
void CopyTo(void* newSpace) const override {
(void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor
}
~Wrapper() {
// have to destroy explicitly in reverse order
pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
}
};
// given a tuple, return the type of the element that has the maximum alignment requirement.
// Given a tuple and that type, return the number of elements of the object with the max
// alignment requirement that is at least as big as the largest object in the tuple.
template<bool, class T1, class T2> struct pick_one;
template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
template< template<class> class Selector, typename T1, typename T2 >
struct pick_max {
typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
};
template<typename T> struct size_of { static const int value = sizeof(T); };
template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
typedef typename std::tuple_element<N-1, Tuple>::type ThisType;
typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
};
template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
typedef typename std::tuple_element<0, Tuple>::type type;
};
// is the specified type included in a tuple?
template<class Q, size_t N, class Tuple>
struct is_element_of {
typedef typename std::tuple_element<N-1, Tuple>::type T_i;
static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
};
template<class Q, class Tuple>
struct is_element_of<Q,0,Tuple> {
typedef typename std::tuple_element<0, Tuple>::type T_i;
static const bool value = std::is_same<Q,T_i>::value;
};
// allow the construction of types that are listed tuple. If a disallowed type
// construction is written, a method involving this type is created. The
// type has no definition, so a syntax error is generated.
template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
template<typename T, bool BUILD_IT> struct do_if;
template<typename T>
struct do_if<T, true> {
static void construct(void *mySpace, const T& x) {
(void) new(mySpace) Wrapper<T>(x);
}
};
template<typename T>
struct do_if<T, false> {
static void construct(void * /*mySpace*/, const T& x) {
// This method is instantiated when the type T does not match any of the
// element types in the Tuple in variant<Tuple>.
ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
}
};
// Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in
// Wrapper, and how big Wrapper is.
//
// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
using tbb::detail::punned_cast;
struct tagged_null_type {};
template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type,
typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
class tagged_msg {
typedef std::tuple<T0, T1, T2, T3, T4
//TODO: Should we reject lists longer than a tuple can hold?
#if __TBB_VARIADIC_MAX >= 6
, T5
#endif
#if __TBB_VARIADIC_MAX >= 7
, T6
#endif
#if __TBB_VARIADIC_MAX >= 8
, T7
#endif
#if __TBB_VARIADIC_MAX >= 9
, T8
#endif
#if __TBB_VARIADIC_MAX >= 10
, T9
#endif
> Tuple;
private:
class variant {
static const size_t N = std::tuple_size<Tuple>::value;
typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
typedef aligned_space<AlignType, MaxNElements> SpaceType;
SpaceType my_space;
static const size_t MaxSize = sizeof(SpaceType);
public:
variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
template<typename T>
variant( const T& x ) {
do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
}
variant(const variant& other) {
const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
h->CopyTo(&my_space);
}
// assignment must destroy and re-create the Wrapper type, as there is no way
// to create a Wrapper-to-Wrapper assign even if we find they agree in type.
void operator=( const variant& rhs ) {
if(&rhs != this) {
WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
h->~WrapperBase();
const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
ch->CopyTo(&my_space);
}
}
template<typename U>
const U& variant_cast_to() const {
const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
if(!h) {
throw_exception(exception_id::bad_tagged_msg_cast);
}
return h->value();
}
template<typename U>
bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != nullptr; }
bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
~variant() {
WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
h->~WrapperBase();
}
}; //class variant
TagType my_tag;
variant my_msg;
public:
tagged_msg(): my_tag(TagType(~0)), my_msg(){}
template<typename T, typename R>
tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
template<typename T, typename R, size_t N>
tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {}
void set_tag(TagType const &index) {my_tag = index;}
TagType tag() const {return my_tag;}
template<typename V>
const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
template<typename V>
bool is_a() const {return my_msg.template variant_is_a<V>();}
bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
}; //class tagged_msg
// template to simplify cast and test for tagged_msg in template contexts
template<typename V, typename T>
const V& cast_to(T const &t) { return t.template cast_to<V>(); }
template<typename V, typename T>
bool is_a(T const &t) { return t.template is_a<V>(); }
enum op_stat { WAIT = 0, SUCCEEDED, FAILED };
#endif /* __TBB__flow_graph_types_impl_H */

148
third_party/tbb/detail/_hash_compare.hh vendored Normal file
View file

@ -0,0 +1,148 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__hash_compare_H
#define __TBB_detail__hash_compare_H
#include "third_party/libcxx/functional"
#include "third_party/tbb/detail/_containers_helpers.hh"
namespace tbb {
namespace detail {
namespace d1 {
template <typename Key, typename Hash, typename KeyEqual>
class hash_compare {
using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>;
public:
using hasher = Hash;
using key_equal = typename is_transparent_hash::type;
hash_compare() = default;
hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {}
std::size_t operator()( const Key& key ) const {
return std::size_t(my_hasher(key));
}
bool operator()( const Key& key1, const Key& key2 ) const {
return my_equal(key1, key2);
}
template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type>
std::size_t operator()( const K& key ) const {
return std::size_t(my_hasher(key));
}
template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type>
bool operator()( const K1& key1, const K2& key2 ) const {
return my_equal(key1, key2);
}
hasher hash_function() const {
return my_hasher;
}
key_equal key_eq() const {
return my_equal;
}
private:
hasher my_hasher;
key_equal my_equal;
}; // class hash_compare
//! hash_compare that is default argument for concurrent_hash_map
template <typename Key>
class tbb_hash_compare {
public:
std::size_t hash( const Key& a ) const { return my_hash_func(a); }
#if defined(_MSC_VER) && _MSC_VER <= 1900
#pragma warning (push)
// MSVC 2015 throws a strange warning: 'std::size_t': forcing value to bool 'true' or 'false'
#pragma warning (disable: 4800)
#endif
bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); }
#if defined(_MSC_VER) && _MSC_VER <= 1900
#pragma warning (pop)
#endif
private:
std::hash<Key> my_hash_func;
std::equal_to<Key> my_key_equal;
};
} // namespace d1
#if __TBB_CPP20_CONCEPTS_PRESENT
inline namespace d0 {
template <typename HashCompare, typename Key>
concept hash_compare = std::copy_constructible<HashCompare> &&
requires( const std::remove_reference_t<HashCompare>& hc, const Key& key1, const Key& key2 ) {
{ hc.hash(key1) } -> std::same_as<std::size_t>;
{ hc.equal(key1, key2) } -> std::convertible_to<bool>;
};
} // namespace d0
#endif // __TBB_CPP20_CONCEPTS_PRESENT
} // namespace detail
} // namespace tbb
#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS
namespace std {
template <typename T, typename U>
struct hash<std::pair<T, U>> {
public:
std::size_t operator()( const std::pair<T, U>& p ) const {
return first_hash(p.first) ^ second_hash(p.second);
}
private:
std::hash<T> first_hash;
std::hash<U> second_hash;
}; // struct hash<std::pair>
// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>>
#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER)
template <typename CharT, typename Traits, typename Allocator>
struct hash<std::basic_string<CharT, Traits, Allocator>> {
public:
std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const {
std::size_t h = 0;
for ( const CharT* c = s.c_str(); *c; ++c ) {
h = h * hash_multiplier ^ char_hash(*c);
}
return h;
}
private:
static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value;
std::hash<CharT> char_hash;
}; // struct hash<std::basic_string>
#endif // !(_LIBCPP_VERSION || _CPPLIB_VER)
} // namespace std
#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS
#endif // __TBB_detail__hash_compare_H

View file

@ -0,0 +1,42 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_detail__intrusive_list_node_H
#define _TBB_detail__intrusive_list_node_H
namespace tbb {
namespace detail {
namespace d1 {
//! Data structure to be inherited by the types that can form intrusive lists.
/** Intrusive list is formed by means of the member_intrusive_list<T> template class.
Note that type T must derive from intrusive_list_node either publicly or
declare instantiation member_intrusive_list<T> as a friend.
This class implements a limited subset of std::list interface. **/
struct intrusive_list_node {
intrusive_list_node* my_prev_node{};
intrusive_list_node* my_next_node{};
#if TBB_USE_ASSERT
intrusive_list_node() { my_prev_node = my_next_node = this; }
#endif /* TBB_USE_ASSERT */
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // _TBB_detail__intrusive_list_node_H

397
third_party/tbb/detail/_machine.hh vendored Normal file
View file

@ -0,0 +1,397 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__machine_H
#define __TBB_detail__machine_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/climits"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/cstddef"
#ifdef _WIN32
// MISSING #include <intrin.h>
#ifdef __TBBMALLOC_BUILD
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h" // SwitchToThread()
#endif
#ifdef _MSC_VER
#if __TBB_x86_64 || __TBB_x86_32
#pragma intrinsic(__rdtsc)
#endif
#endif
#endif
#if __TBB_x86_64 || __TBB_x86_32
#include "third_party/intel/immintrin.internal.h" // _mm_pause
#endif
#if (_WIN32)
#include "libc/math.h"
#include "libc/runtime/fenv.h" // _control87
#endif
#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
#include "libc/calls/calls.h"
#include "libc/calls/struct/cpuset.h"
#include "libc/calls/struct/sched_param.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/sched.h" // sched_yield
#else
#include "third_party/libcxx/thread" // std::this_thread::yield()
#endif
namespace tbb {
namespace detail {
inline namespace d0 {
//--------------------------------------------------------------------------------------------------
// Yield implementation
//--------------------------------------------------------------------------------------------------
#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
static inline void yield() {
int err = sched_yield();
__TBB_ASSERT_EX(err == 0, "sched_yield has failed");
}
#elif __TBBMALLOC_BUILD && _WIN32
// Use Windows API for yield in tbbmalloc to avoid dependency on C++ runtime with some implementations.
static inline void yield() {
SwitchToThread();
}
#else
using std::this_thread::yield;
#endif
//--------------------------------------------------------------------------------------------------
// atomic_fence_seq_cst implementation
//--------------------------------------------------------------------------------------------------
static inline void atomic_fence_seq_cst() {
#if (__TBB_x86_64 || __TBB_x86_32) && defined(__GNUC__) && __GNUC__ < 11
unsigned char dummy = 0u;
__asm__ __volatile__ ("lock; notb %0" : "+m" (dummy) :: "memory");
#else
std::atomic_thread_fence(std::memory_order_seq_cst);
#endif
}
//--------------------------------------------------------------------------------------------------
// Pause implementation
//--------------------------------------------------------------------------------------------------
static inline void machine_pause(int32_t delay) {
#if __TBB_x86_64 || __TBB_x86_32
while (delay-- > 0) { _mm_pause(); }
#elif __ARM_ARCH_7A__ || __aarch64__
while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); }
#else /* Generic */
(void)delay; // suppress without including _template_helpers.h
yield();
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// tbb::detail::log2() implementation
////////////////////////////////////////////////////////////////////////////////////////////////////
// TODO: Use log2p1() function that will be available in C++20 standard
#if defined(__GNUC__) || defined(__clang__)
namespace gnu_builtins {
inline uintptr_t clz(unsigned int x) { return static_cast<uintptr_t>(__builtin_clz(x)); }
inline uintptr_t clz(unsigned long int x) { return static_cast<uintptr_t>(__builtin_clzl(x)); }
inline uintptr_t clz(unsigned long long int x) { return static_cast<uintptr_t>(__builtin_clzll(x)); }
}
#elif defined(_MSC_VER)
#pragma intrinsic(__TBB_W(_BitScanReverse))
namespace msvc_intrinsics {
static inline uintptr_t bit_scan_reverse(uintptr_t i) {
unsigned long j;
__TBB_W(_BitScanReverse)( &j, i );
return j;
}
}
#endif
template <typename T>
constexpr std::uintptr_t number_of_bits() {
return sizeof(T) * CHAR_BIT;
}
// logarithm is the index of the most significant non-zero bit
static inline uintptr_t machine_log2(uintptr_t x) {
#if defined(__GNUC__) || defined(__clang__)
// If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x
return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x);
#elif defined(_MSC_VER)
return msvc_intrinsics::bit_scan_reverse(x);
#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__
uintptr_t j, i = x;
__asm__("bsr %1,%0" : "=r"(j) : "r"(i));
return j;
#elif __powerpc__ || __POWERPC__
#if __TBB_WORDSIZE==8
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
return 63 - static_cast<intptr_t>(x);
#else
__asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
return 31 - static_cast<intptr_t>(x);
#endif /*__TBB_WORDSIZE*/
#elif __sparc
uint64_t count;
// one hot encode
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x |= (x >> 32);
// count 1's
__asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
return count - 1;
#else
intptr_t result = 0;
if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; }
if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; }
if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; }
if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; }
if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; }
return (x & 2) ? result + 1 : result;
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// tbb::detail::reverse_bits() implementation
////////////////////////////////////////////////////////////////////////////////////////////////////
#if TBB_USE_CLANG_BITREVERSE_BUILTINS
namespace llvm_builtins {
inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); }
inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); }
inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); }
inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); }
}
#else // generic
template<typename T>
struct reverse {
static const T byte_table[256];
};
template<typename T>
const T reverse<T>::byte_table[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
};
inline unsigned char reverse_byte(unsigned char src) {
return reverse<unsigned char>::byte_table[src];
}
#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS
template<typename T>
T machine_reverse_bits(T src) {
#if TBB_USE_CLANG_BITREVERSE_BUILTINS
return builtin_bitreverse(fixed_width_cast(src));
#else /* Generic */
T dst;
unsigned char *original = reinterpret_cast<unsigned char *>(&src);
unsigned char *reversed = reinterpret_cast<unsigned char *>(&dst);
for ( int i = sizeof(T) - 1; i >= 0; i-- ) {
reversed[i] = reverse_byte( original[sizeof(T) - i - 1] );
}
return dst;
#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS
}
} // inline namespace d0
namespace d1 {
#if (_WIN32)
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
struct cpu_ctl_env {
unsigned int x87cw{};
#if (__TBB_x86_64)
// Changing the infinity mode or the floating-point precision is not supported on x64.
// The attempt causes an assertion. See
// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2
static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC;
#else
static constexpr unsigned int X87CW_CONTROL_MASK = ~0U;
#endif
#if (__TBB_x86_32 || __TBB_x86_64)
unsigned int mxcsr{};
static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */
#endif
bool operator!=( const cpu_ctl_env& ctl ) const {
return
#if (__TBB_x86_32 || __TBB_x86_64)
mxcsr != ctl.mxcsr ||
#endif
x87cw != ctl.x87cw;
}
void get_env() {
x87cw = _control87(0, 0);
#if (__TBB_x86_32 || __TBB_x86_64)
mxcsr = _mm_getcsr();
#endif
}
void set_env() const {
_control87(x87cw, X87CW_CONTROL_MASK);
#if (__TBB_x86_32 || __TBB_x86_64)
_mm_setcsr(mxcsr & MXCSR_CONTROL_MASK);
#endif
}
};
#elif (__TBB_x86_32 || __TBB_x86_64)
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
struct cpu_ctl_env {
int mxcsr{};
short x87cw{};
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
bool operator!=(const cpu_ctl_env& ctl) const {
return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw;
}
void get_env() {
__asm__ __volatile__(
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(mxcsr), "=m"(x87cw)
);
mxcsr &= MXCSR_CONTROL_MASK;
}
void set_env() const {
__asm__ __volatile__(
"ldmxcsr %0\n\t"
"fldcw %1"
: : "m"(mxcsr), "m"(x87cw)
);
}
};
#endif
} // namespace d1
} // namespace detail
} // namespace tbb
#if !__TBB_CPU_CTL_ENV_PRESENT
#include "libc/runtime/fenv.h"
#include "third_party/libcxx/cstring"
namespace tbb {
namespace detail {
namespace r1 {
void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size);
void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p);
} // namespace r1
namespace d1 {
class cpu_ctl_env {
fenv_t *my_fenv_ptr;
public:
cpu_ctl_env() : my_fenv_ptr(nullptr) {}
~cpu_ctl_env() {
if ( my_fenv_ptr )
r1::cache_aligned_deallocate( (void*)my_fenv_ptr );
}
// It is possible not to copy memory but just to copy pointers but the following issues should be addressed:
// 1. The arena lifetime and the context lifetime are independent;
// 2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside
// dispatch loop may become invalid.
// But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation
// with a platform specific implementation.
cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(nullptr) {
*this = src;
}
cpu_ctl_env& operator=( const cpu_ctl_env &src ) {
__TBB_ASSERT( src.my_fenv_ptr, nullptr);
if ( !my_fenv_ptr )
my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
*my_fenv_ptr = *src.my_fenv_ptr;
return *this;
}
bool operator!=( const cpu_ctl_env &ctl ) const {
__TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
__TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." );
return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) );
}
void get_env () {
if ( !my_fenv_ptr )
my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
fegetenv( my_fenv_ptr );
}
const cpu_ctl_env& set_env () const {
__TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
fesetenv( my_fenv_ptr );
return *this;
}
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
#endif // __TBB_detail__machine_H

62
third_party/tbb/detail/_mutex_common.hh vendored Normal file
View file

@ -0,0 +1,62 @@
// clang-format off
/*
Copyright (c) 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__mutex_common_H
#define __TBB_detail__mutex_common_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_utils.hh"
#if __TBB_CPP20_CONCEPTS_PRESENT
// MISSING #include <concepts>
namespace tbb {
namespace detail {
inline namespace d0 {
template <typename Lock, typename Mutex>
concept mutex_scoped_lock = std::default_initializable<Lock> &&
std::constructible_from<Lock, Mutex&> &&
requires( Lock& lock, Mutex& mutex ) {
lock.acquire(mutex);
{ lock.try_acquire(mutex) } -> adaptive_same_as<bool>;
lock.release();
};
template <typename Lock, typename Mutex>
concept rw_mutex_scoped_lock = mutex_scoped_lock<Lock, Mutex> &&
std::constructible_from<Lock, Mutex&, bool> &&
requires( Lock& lock, Mutex& mutex ) {
lock.acquire(mutex, false);
{ lock.try_acquire(mutex, false) } -> adaptive_same_as<bool>;
{ lock.upgrade_to_writer() } -> adaptive_same_as<bool>;
{ lock.downgrade_to_reader() } -> adaptive_same_as<bool>;
};
template <typename Mutex>
concept scoped_lockable = mutex_scoped_lock<typename Mutex::scoped_lock, Mutex>;
template <typename Mutex>
concept rw_scoped_lockable = scoped_lockable<Mutex> &&
rw_mutex_scoped_lock<typename Mutex::scoped_lock, Mutex>;
} // namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB_CPP20_CONCEPTS_PRESENT
#endif // __TBB_detail__mutex_common_H

View file

@ -0,0 +1,25 @@
// clang-format off
/*
Copyright (c) 2020-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// All public entities of the OneAPI Spec are available under oneapi namespace
// Define tbb namespace first as it might not be known yet
namespace tbb {}
namespace oneapi {
namespace tbb = ::tbb;
}

163
third_party/tbb/detail/_node_handle.hh vendored Normal file
View file

@ -0,0 +1,163 @@
// clang-format off
/*
Copyright (c) 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__node_handle_H
#define __TBB_detail__node_handle_H
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/tbb/detail/_assert.hh"
namespace tbb {
namespace detail {
namespace d1 {
// A structure to access private node handle methods in internal TBB classes
// Regular friend declaration is not convenient because classes which use node handle
// can be placed in the different versioning namespaces.
struct node_handle_accessor {
template <typename NodeHandleType>
static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) {
return nh.get_node_ptr();
}
template <typename NodeHandleType>
static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) {
return NodeHandleType{node_ptr};
}
template <typename NodeHandleType>
static void deactivate( NodeHandleType& nh ) {
nh.deactivate();
}
}; // struct node_handle_accessor
template<typename Value, typename Node, typename Allocator>
class node_handle_base {
public:
using allocator_type = Allocator;
protected:
using node = Node;
using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
public:
node_handle_base() : my_node(nullptr), my_allocator() {}
node_handle_base(node_handle_base&& nh) : my_node(nh.my_node),
my_allocator(std::move(nh.my_allocator)) {
nh.my_node = nullptr;
}
__TBB_nodiscard bool empty() const { return my_node == nullptr; }
explicit operator bool() const { return my_node != nullptr; }
~node_handle_base() { internal_destroy(); }
node_handle_base& operator=( node_handle_base&& nh ) {
internal_destroy();
my_node = nh.my_node;
move_assign_allocators(my_allocator, nh.my_allocator);
nh.deactivate();
return *this;
}
void swap( node_handle_base& nh ) {
using std::swap;
swap(my_node, nh.my_node);
swap_allocators(my_allocator, nh.my_allocator);
}
allocator_type get_allocator() const {
return my_allocator;
}
protected:
node_handle_base( node* n ) : my_node(n) {}
void internal_destroy() {
if(my_node != nullptr) {
allocator_traits_type::destroy(my_allocator, my_node->storage());
typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator);
node_allocator.deallocate(my_node, 1);
}
}
node* get_node_ptr() { return my_node; }
void deactivate() { my_node = nullptr; }
node* my_node;
allocator_type my_allocator;
};
// node handle for maps
template<typename Key, typename Value, typename Node, typename Allocator>
class node_handle : public node_handle_base<Value, Node, Allocator> {
using base_type = node_handle_base<Value, Node, Allocator>;
public:
using key_type = Key;
using mapped_type = typename Value::second_type;
using allocator_type = typename base_type::allocator_type;
node_handle() = default;
key_type& key() const {
__TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object");
return *const_cast<key_type*>(&(this->my_node->value().first));
}
mapped_type& mapped() const {
__TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object");
return this->my_node->value().second;
}
private:
friend struct node_handle_accessor;
node_handle( typename base_type::node* n ) : base_type(n) {}
}; // class node_handle
// node handle for sets
template<typename Key, typename Node, typename Allocator>
class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> {
using base_type = node_handle_base<Key, Node, Allocator>;
public:
using value_type = Key;
using allocator_type = typename base_type::allocator_type;
node_handle() = default;
value_type& value() const {
__TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object");
return *const_cast<value_type*>(&(this->my_node->value()));
}
private:
friend struct node_handle_accessor;
node_handle( typename base_type::node* n ) : base_type(n) {}
}; // class node_handle
template <typename Key, typename Value, typename Node, typename Allocator>
void swap( node_handle<Key, Value, Node, Allocator>& lhs,
node_handle<Key, Value, Node, Allocator>& rhs ) {
return lhs.swap(rhs);
}
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__node_handle_H

View file

@ -0,0 +1,456 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_parallel_filters_H
#define __TBB_parallel_filters_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/detail/_pipeline_filters_deduction.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/cstdint"
namespace tbb {
namespace detail {
namespace d1 {
class base_filter;
}
namespace r1 {
TBB_EXPORT void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&);
class pipeline;
class stage_task;
class input_buffer;
}
namespace d1 {
class filter_node;
//! A stage in a pipeline.
/** @ingroup algorithms */
class base_filter{
private:
//! Value used to mark "not in pipeline"
static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); }
public:
//! The lowest bit 0 is for parallel vs serial
static constexpr unsigned int filter_is_serial = 0x1;
//! 2nd bit distinguishes ordered vs unordered filters.
static constexpr unsigned int filter_is_out_of_order = 0x1<<1;
//! 3rd bit marks input filters emitting small objects
static constexpr unsigned int filter_may_emit_null = 0x1<<2;
base_filter(const base_filter&) = delete;
base_filter& operator=(const base_filter&) = delete;
protected:
explicit base_filter( unsigned int m ) :
next_filter_in_pipeline(not_in_pipeline()),
my_input_buffer(nullptr),
my_filter_mode(m),
my_pipeline(nullptr)
{}
// signal end-of-input for concrete_filters
void set_end_of_input() {
r1::set_end_of_input(*this);
}
public:
//! True if filter is serial.
bool is_serial() const {
return bool( my_filter_mode & filter_is_serial );
}
//! True if filter must receive stream in order.
bool is_ordered() const {
return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order);
}
//! true if an input filter can emit null
bool object_may_be_null() {
return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
}
//! Operate on an item from the input stream, and return item for output stream.
/** Returns nullptr if filter is a sink. */
virtual void* operator()( void* item ) = 0;
//! Destroy filter.
virtual ~base_filter() {};
//! Destroys item if pipeline was cancelled.
/** Required to prevent memory leaks.
Note it can be called concurrently even for serial filters.*/
virtual void finalize( void* /*item*/ ) {}
private:
//! Pointer to next filter in the pipeline.
base_filter* next_filter_in_pipeline;
//! Buffer for incoming tokens, or nullptr if not required.
/** The buffer is required if the filter is serial. */
r1::input_buffer* my_input_buffer;
friend class r1::stage_task;
friend class r1::pipeline;
friend void r1::set_end_of_input(d1::base_filter&);
//! Storage for filter mode and dynamically checked implementation version.
const unsigned int my_filter_mode;
//! Pointer to the pipeline.
r1::pipeline* my_pipeline;
};
template<typename Body, typename InputType, typename OutputType >
class concrete_filter;
//! input_filter control to signal end-of-input for parallel_pipeline
class flow_control {
bool is_pipeline_stopped = false;
flow_control() = default;
template<typename Body, typename InputType, typename OutputType > friend class concrete_filter;
template<typename Output>
__TBB_requires(std::copyable<Output>)
friend class input_node;
public:
void stop() { is_pipeline_stopped = true; }
};
// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe).
#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT
template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>;
#else
template<typename T> struct tbb_trivially_copyable { enum { value = false }; };
template<typename T> struct tbb_trivially_copyable < T* > { enum { value = true }; };
template<> struct tbb_trivially_copyable < bool > { enum { value = true }; };
template<> struct tbb_trivially_copyable < char > { enum { value = true }; };
template<> struct tbb_trivially_copyable < signed char > { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned char > { enum { value = true }; };
template<> struct tbb_trivially_copyable < short > { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned short > { enum { value = true }; };
template<> struct tbb_trivially_copyable < int > { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned int > { enum { value = true }; };
template<> struct tbb_trivially_copyable < long > { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned long > { enum { value = true }; };
template<> struct tbb_trivially_copyable < long long> { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned long long> { enum { value = true }; };
template<> struct tbb_trivially_copyable < float > { enum { value = true }; };
template<> struct tbb_trivially_copyable < double > { enum { value = true }; };
template<> struct tbb_trivially_copyable < long double > { enum { value = true }; };
#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT
template<typename T>
struct use_allocator {
static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value;
};
// A helper class to customize how a type is passed between filters.
// Usage: token_helper<T, use_allocator<T>::value>
template<typename T, bool Allocate> struct token_helper;
// using tbb_allocator
template<typename T>
struct token_helper<T, true> {
using pointer = T*;
using value_type = T;
static pointer create_token(value_type && source) {
return new (r1::allocate_memory(sizeof(T))) T(std::move(source));
}
static value_type & token(pointer & t) { return *t; }
static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); }
static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); }
static void destroy_token(pointer token) {
token->~value_type();
r1::deallocate_memory(token);
}
};
// pointer specialization
template<typename T>
struct token_helper<T*, false> {
using pointer = T*;
using value_type = T*;
static pointer create_token(const value_type & source) { return source; }
static value_type & token(pointer & t) { return t; }
static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); }
static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); }
static void destroy_token( pointer /*token*/) {}
};
// converting type to and from void*, passing objects directly
template<typename T>
struct token_helper<T, false> {
typedef union {
T actual_value;
void * void_overlay;
} type_to_void_ptr_map;
using pointer = T; // not really a pointer in this case.
using value_type = T;
static pointer create_token(const value_type & source) { return source; }
static value_type & token(pointer & t) { return t; }
static void * cast_to_void_ptr(pointer ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = nullptr;
mymap.actual_value = ref;
return mymap.void_overlay;
}
static pointer cast_from_void_ptr(void * ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = ref;
return mymap.actual_value;
}
static void destroy_token( pointer /*token*/) {}
};
// intermediate
template<typename InputType, typename OutputType, typename Body>
class concrete_filter: public base_filter {
const Body& my_body;
using input_helper = token_helper<InputType, use_allocator<InputType >::value>;
using input_pointer = typename input_helper::pointer;
using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>;
using output_pointer = typename output_helper::pointer;
void* operator()(void* input) override {
input_pointer temp_input = input_helper::cast_from_void_ptr(input);
output_pointer temp_output = output_helper::create_token(tbb::detail::invoke(my_body, std::move(input_helper::token(temp_input))));
input_helper::destroy_token(temp_input);
return output_helper::cast_to_void_ptr(temp_output);
}
void finalize(void * input) override {
input_pointer temp_input = input_helper::cast_from_void_ptr(input);
input_helper::destroy_token(temp_input);
}
public:
concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
};
// input
template<typename OutputType, typename Body>
class concrete_filter<void, OutputType, Body>: public base_filter {
const Body& my_body;
using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>;
using output_pointer = typename output_helper::pointer;
void* operator()(void*) override {
flow_control control;
output_pointer temp_output = output_helper::create_token(my_body(control));
if(control.is_pipeline_stopped) {
output_helper::destroy_token(temp_output);
set_end_of_input();
return nullptr;
}
return output_helper::cast_to_void_ptr(temp_output);
}
public:
concrete_filter(unsigned int m, const Body& body) :
base_filter(m | filter_may_emit_null),
my_body(body)
{}
};
// output
template<typename InputType, typename Body>
class concrete_filter<InputType, void, Body>: public base_filter {
const Body& my_body;
using input_helper = token_helper<InputType, use_allocator<InputType >::value>;
using input_pointer = typename input_helper::pointer;
void* operator()(void* input) override {
input_pointer temp_input = input_helper::cast_from_void_ptr(input);
tbb::detail::invoke(my_body, std::move(input_helper::token(temp_input)));
input_helper::destroy_token(temp_input);
return nullptr;
}
void finalize(void* input) override {
input_pointer temp_input = input_helper::cast_from_void_ptr(input);
input_helper::destroy_token(temp_input);
}
public:
concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
};
template<typename Body>
class concrete_filter<void, void, Body>: public base_filter {
const Body& my_body;
void* operator()(void*) override {
flow_control control;
my_body(control);
void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1;
return output;
}
public:
concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {}
};
class filter_node_ptr {
filter_node * my_node;
public:
filter_node_ptr() : my_node(nullptr) {}
filter_node_ptr(filter_node *);
~filter_node_ptr();
filter_node_ptr(const filter_node_ptr &);
filter_node_ptr(filter_node_ptr &&);
void operator=(filter_node *);
void operator=(const filter_node_ptr &);
void operator=(filter_node_ptr &&);
filter_node& operator*() const;
operator bool() const;
};
//! Abstract base class that represents a node in a parse tree underlying a filter class.
/** These nodes are always heap-allocated and can be shared by filter objects. */
class filter_node {
/** Count must be atomic because it is hidden state for user, but might be shared by threads. */
std::atomic<std::intptr_t> ref_count;
public:
filter_node_ptr left;
filter_node_ptr right;
protected:
filter_node() : ref_count(0), left(nullptr), right(nullptr) {
#ifdef __TBB_TEST_FILTER_NODE_COUNT
++(__TBB_TEST_FILTER_NODE_COUNT);
#endif
}
public:
filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){
left = x;
right = y;
}
filter_node(const filter_node&) = delete;
filter_node& operator=(const filter_node&) = delete;
//! Add concrete_filter to pipeline
virtual base_filter* create_filter() const {
__TBB_ASSERT(false, "method of non-leaf was called");
return nullptr;
}
//! Increment reference count
void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); }
//! Decrement reference count and delete if it becomes zero.
void remove_ref() {
__TBB_ASSERT(ref_count>0,"ref_count underflow");
if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) {
this->~filter_node();
r1::deallocate_memory(this);
}
}
virtual ~filter_node() {
#ifdef __TBB_TEST_FILTER_NODE_COUNT
--(__TBB_TEST_FILTER_NODE_COUNT);
#endif
}
};
inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) {
if (my_node) {
my_node->add_ref();
}
}
inline filter_node_ptr::~filter_node_ptr() {
if (my_node) {
my_node->remove_ref();
}
}
inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) {
if (my_node) {
my_node->add_ref();
}
}
inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) {
rhs.my_node = nullptr;
}
inline void filter_node_ptr::operator=(filter_node * rhs) {
// Order of operations below carefully chosen so that reference counts remain correct
// in unlikely event that remove_ref throws exception.
filter_node* old = my_node;
my_node = rhs;
if (my_node) {
my_node->add_ref();
}
if (old) {
old->remove_ref();
}
}
inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) {
*this = rhs.my_node;
}
inline void filter_node_ptr::operator=(filter_node_ptr && rhs) {
filter_node* old = my_node;
my_node = rhs.my_node;
rhs.my_node = nullptr;
if (old) {
old->remove_ref();
}
}
inline filter_node& filter_node_ptr::operator*() const{
__TBB_ASSERT(my_node,"nullptr node is used");
return *my_node;
}
inline filter_node_ptr::operator bool() const {
return my_node != nullptr;
}
//! Node in parse tree representing result of make_filter.
template<typename InputType, typename OutputType, typename Body>
class filter_node_leaf: public filter_node {
const unsigned int my_mode;
const Body my_body;
base_filter* create_filter() const override {
return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body);
}
public:
filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {}
};
template <typename Body, typename Input = typename filter_body_types<decltype(&Body::operator())>::input_type>
using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type;
template <typename Body>
using filter_output = typename filter_body_types<decltype(&Body::operator())>::output_type;
} // namespace d1
} // namespace detail
} // namespace tbb
#endif /* __TBB_parallel_filters_H */

View file

@ -0,0 +1,47 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__pipeline_filters_deduction_H
#define __TBB__pipeline_filters_deduction_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/libcxx/utility"
#include "third_party/libcxx/type_traits"
namespace tbb {
namespace detail {
namespace d1 {
template <typename Input, typename Output>
struct declare_filter_types {
using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type;
using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type;
};
template <typename T> struct filter_body_types;
template <typename T, typename Input, typename Output>
struct filter_body_types<Output(T::*)(Input) const> : declare_filter_types<Input, Output> {};
template <typename T, typename Input, typename Output>
struct filter_body_types<Output(T::*)(Input)> : declare_filter_types<Input, Output> {};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB__pipeline_filters_deduction_H

131
third_party/tbb/detail/_range_common.hh vendored Normal file
View file

@ -0,0 +1,131 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__range_common_H
#define __TBB_detail__range_common_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_utils.hh"
#if __TBB_CPP20_CONCEPTS_PRESENT
// MISSING #include <concepts>
#endif
#include "third_party/libcxx/iterator"
namespace tbb {
namespace detail {
inline namespace d0 {
//! Dummy type that distinguishes splitting constructor from copy constructor.
/**
* See description of parallel_for and parallel_reduce for example usages.
* @ingroup algorithms
*/
class split {};
//! Type enables transmission of splitting proportion from partitioners to range objects
/**
* In order to make use of such facility Range objects must implement
* splitting constructor with this type passed.
*/
class proportional_split : no_assign {
public:
proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { }
size_t left() const { return my_left; }
size_t right() const { return my_right; }
// used when range does not support proportional split
explicit operator split() const { return split(); }
private:
size_t my_left, my_right;
};
template <typename Range, typename = void>
struct range_split_object_provider {
template <typename PartitionerSplitType>
static split get( PartitionerSplitType& ) { return split(); }
};
template <typename Range>
struct range_split_object_provider<Range,
typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> {
template <typename PartitionerSplitType>
static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; }
};
template <typename Range, typename PartitionerSplitType>
auto get_range_split_object( PartitionerSplitType& split_obj )
-> decltype(range_split_object_provider<Range>::get(split_obj)) {
return range_split_object_provider<Range>::get(split_obj);
}
template <typename Range>
using range_iterator_type = decltype(std::begin(std::declval<Range&>()));
#if __TBB_CPP20_CONCEPTS_PRESENT
template <typename Iterator>
using iterator_reference_type = typename std::iterator_traits<Iterator>::reference;
template <typename Range>
using range_reference_type = iterator_reference_type<range_iterator_type<Range>>;
template <typename Value>
concept blocked_range_value = std::copyable<Value> &&
requires( const std::remove_reference_t<Value>& lhs, const std::remove_reference_t<Value>& rhs ) {
{ lhs < rhs } -> relaxed_convertible_to<bool>;
{ lhs - rhs } -> std::convertible_to<std::size_t>;
{ lhs + (rhs - lhs) } -> std::convertible_to<Value>;
};
template <typename T>
concept splittable = std::constructible_from<T, T&, tbb::detail::split>;
template <typename Range>
concept tbb_range = std::copy_constructible<Range> &&
splittable<Range> &&
requires( const std::remove_reference_t<Range>& range ) {
{ range.empty() } -> relaxed_convertible_to<bool>;
{ range.is_divisible() } -> relaxed_convertible_to<bool>;
};
template <typename Iterator>
constexpr bool iterator_concept_helper( std::input_iterator_tag ) {
return std::input_iterator<Iterator>;
}
template <typename Iterator>
constexpr bool iterator_concept_helper( std::random_access_iterator_tag ) {
return std::random_access_iterator<Iterator>;
}
template <typename Iterator, typename IteratorTag>
concept iterator_satisfies = requires (IteratorTag tag) {
requires iterator_concept_helper<Iterator>(tag);
};
template <typename Sequence, typename IteratorTag>
concept container_based_sequence = requires( Sequence& seq ) {
{ std::begin(seq) } -> iterator_satisfies<IteratorTag>;
{ std::end(seq) } -> iterator_satisfies<IteratorTag>;
};
#endif // __TBB_CPP20_CONCEPTS_PRESENT
} // namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__range_common_H

163
third_party/tbb/detail/_rtm_mutex.hh vendored Normal file
View file

@ -0,0 +1,163 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__rtm_mutex_impl_H
#define __TBB__rtm_mutex_impl_H
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/spin_mutex.hh"
#include "third_party/tbb/profiling.hh"
namespace tbb {
namespace detail {
namespace r1 {
struct rtm_mutex_impl;
}
namespace d1 {
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Suppress warning: structure was padded due to alignment specifier
#pragma warning (push)
#pragma warning (disable: 4324)
#endif
/** A rtm_mutex is an speculation-enabled spin mutex.
It should be used for locking short critical sections where the lock is
contended but the data it protects are not. If zero-initialized, the
mutex is considered unheld.
@ingroup synchronization */
class alignas(max_nfs_size) rtm_mutex : private spin_mutex {
private:
enum class rtm_state {
rtm_none,
rtm_transacting,
rtm_real
};
public:
//! Constructors
rtm_mutex() noexcept {
create_itt_sync(this, "tbb::speculative_spin_mutex", "");
}
//! Destructor
~rtm_mutex() = default;
//! Represents acquisition of a mutex.
class scoped_lock {
public:
friend class rtm_mutex;
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {}
//! Acquire lock on given mutex.
scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {
acquire(m);
}
//! Release lock (if lock is held).
~scoped_lock() {
if(m_transaction_state != rtm_state::rtm_none) {
release();
}
}
//! No Copy
scoped_lock(const scoped_lock&) = delete;
scoped_lock& operator=(const scoped_lock&) = delete;
//! Acquire lock on given mutex.
void acquire(rtm_mutex& m);
//! Try acquire lock on given mutex.
bool try_acquire(rtm_mutex& m);
//! Release lock
void release();
private:
rtm_mutex* m_mutex;
rtm_state m_transaction_state;
friend r1::rtm_mutex_impl;
};
//! Mutex traits
static constexpr bool is_rw_mutex = false;
static constexpr bool is_recursive_mutex = false;
static constexpr bool is_fair_mutex = false;
private:
friend r1::rtm_mutex_impl;
}; // end of rtm_mutex
} // namespace d1
namespace r1 {
//! Internal acquire lock.
// only_speculate == true if we're doing a try_lock, else false.
TBB_EXPORT void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false);
//! Internal try_acquire lock.
TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&);
//! Internal release lock.
TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&);
} // namespace r1
namespace d1 {
//! Acquire lock on given mutex.
inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) {
__TBB_ASSERT(!m_mutex, "lock is already acquired");
r1::acquire(m, *this);
}
//! Try acquire lock on given mutex.
inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) {
__TBB_ASSERT(!m_mutex, "lock is already acquired");
return r1::try_acquire(m, *this);
}
//! Release lock
inline void rtm_mutex::scoped_lock::release() {
__TBB_ASSERT(m_mutex, "lock is not acquired");
__TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired");
return r1::release(*this);
}
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) // 4324 warning
#endif
#if TBB_USE_PROFILING_TOOLS
inline void set_name(rtm_mutex& obj, const char* name) {
itt_set_sync_name(&obj, name);
}
#if (_WIN32||_WIN64)
inline void set_name(rtm_mutex& obj, const wchar_t* name) {
itt_set_sync_name(&obj, name);
}
#endif // WIN
#else
inline void set_name(rtm_mutex&, const char*) {}
#if (_WIN32||_WIN64)
inline void set_name(rtm_mutex&, const wchar_t*) {}
#endif // WIN
#endif
} // namespace d1
} // namespace detail
} // namespace tbb
#endif /* __TBB__rtm_mutex_impl_H */

216
third_party/tbb/detail/_rtm_rw_mutex.hh vendored Normal file
View file

@ -0,0 +1,216 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__rtm_rw_mutex_H
#define __TBB_detail__rtm_rw_mutex_H
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/spin_rw_mutex.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace r1 {
struct rtm_rw_mutex_impl;
}
namespace d1 {
constexpr std::size_t speculation_granularity = 64;
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Suppress warning: structure was padded due to alignment specifier
#pragma warning (push)
#pragma warning (disable: 4324)
#endif
//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference
/** @ingroup synchronization */
class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex {
friend struct r1::rtm_rw_mutex_impl;
private:
enum class rtm_type {
rtm_not_in_mutex,
rtm_transacting_reader,
rtm_transacting_writer,
rtm_real_reader,
rtm_real_writer
};
public:
//! Constructors
rtm_rw_mutex() noexcept : write_flag(false) {
create_itt_sync(this, "tbb::speculative_spin_rw_mutex", "");
}
//! Destructor
~rtm_rw_mutex() = default;
//! Represents acquisition of a mutex.
class scoped_lock {
friend struct r1::rtm_rw_mutex_impl;
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {}
//! Acquire lock on given mutex.
scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {
acquire(m, write);
}
//! Release lock (if lock is held).
~scoped_lock() {
if(m_transaction_state != rtm_type::rtm_not_in_mutex) {
release();
}
}
//! No Copy
scoped_lock(const scoped_lock&) = delete;
scoped_lock& operator=(const scoped_lock&) = delete;
//! Acquire lock on given mutex.
inline void acquire(rtm_rw_mutex& m, bool write = true);
//! Try acquire lock on given mutex.
inline bool try_acquire(rtm_rw_mutex& m, bool write = true);
//! Release lock
inline void release();
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
inline bool upgrade_to_writer();
//! Downgrade writer to become a reader.
inline bool downgrade_to_reader();
inline bool is_writer() const;
private:
rtm_rw_mutex* m_mutex;
rtm_type m_transaction_state;
};
//! Mutex traits
static constexpr bool is_rw_mutex = true;
static constexpr bool is_recursive_mutex = false;
static constexpr bool is_fair_mutex = false;
private:
alignas(speculation_granularity) std::atomic<bool> write_flag;
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) // 4324 warning
#endif
} // namespace d1
namespace r1 {
//! Internal acquire write lock.
// only_speculate == true if we're doing a try_lock, else false.
TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false);
//! Internal acquire read lock.
// only_speculate == true if we're doing a try_lock, else false.
TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false);
//! Internal upgrade reader to become a writer.
TBB_EXPORT bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&);
//! Internal downgrade writer to become a reader.
TBB_EXPORT bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&);
//! Internal try_acquire write lock.
TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&);
//! Internal try_acquire read lock.
TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&);
//! Internal release lock.
TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&);
}
namespace d1 {
//! Acquire lock on given mutex.
void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) {
__TBB_ASSERT(!m_mutex, "lock is already acquired");
if (write) {
r1::acquire_writer(m, *this);
} else {
r1::acquire_reader(m, *this);
}
}
//! Try acquire lock on given mutex.
bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) {
__TBB_ASSERT(!m_mutex, "lock is already acquired");
if (write) {
return r1::try_acquire_writer(m, *this);
} else {
return r1::try_acquire_reader(m, *this);
}
}
//! Release lock
void rtm_rw_mutex::scoped_lock::release() {
__TBB_ASSERT(m_mutex, "lock is not acquired");
__TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired");
return r1::release(*this);
}
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() {
__TBB_ASSERT(m_mutex, "lock is not acquired");
if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) {
return true; // Already a writer
}
return r1::upgrade(*this);
}
//! Downgrade writer to become a reader.
bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() {
__TBB_ASSERT(m_mutex, "lock is not acquired");
if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) {
return true; // Already a reader
}
return r1::downgrade(*this);
}
bool rtm_rw_mutex::scoped_lock::is_writer() const {
__TBB_ASSERT(m_mutex, "lock is not acquired");
return m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer;
}
#if TBB_USE_PROFILING_TOOLS
inline void set_name(rtm_rw_mutex& obj, const char* name) {
itt_set_sync_name(&obj, name);
}
#if (_WIN32||_WIN64)
inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) {
itt_set_sync_name(&obj, name);
}
#endif // WIN
#else
inline void set_name(rtm_rw_mutex&, const char*) {}
#if (_WIN32||_WIN64)
inline void set_name(rtm_rw_mutex&, const wchar_t*) {}
#endif // WIN
#endif
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__rtm_rw_mutex_H

175
third_party/tbb/detail/_scoped_lock.hh vendored Normal file
View file

@ -0,0 +1,175 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail_scoped_lock_H
#define __TBB_detail_scoped_lock_H
namespace tbb {
namespace detail {
namespace d1 {
// unique_scoped_lock supposes that Mutex operations never throw
template <typename Mutex>
class unique_scoped_lock {
//! Points to currently held Mutex, or nullptr if no lock is held.
Mutex* m_mutex{};
public:
//! Construct without acquiring a Mutex.
constexpr unique_scoped_lock() noexcept : m_mutex(nullptr) {}
//! Construct and acquire lock on a Mutex.
unique_scoped_lock(Mutex& m) {
acquire(m);
}
//! No Copy
unique_scoped_lock(const unique_scoped_lock&) = delete;
unique_scoped_lock& operator=(const unique_scoped_lock&) = delete;
//! Acquire lock.
void acquire(Mutex& m) {
__TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired");
m_mutex = &m;
m.lock();
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_acquire(Mutex& m) {
__TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired");
bool succeed = m.try_lock();
if (succeed) {
m_mutex = &m;
}
return succeed;
}
//! Release lock
void release() {
__TBB_ASSERT(m_mutex, "release on Mutex::unique_scoped_lock that is not holding a lock");
m_mutex->unlock();
m_mutex = nullptr;
}
//! Destroy lock. If holding a lock, releases the lock first.
~unique_scoped_lock() {
if (m_mutex) {
release();
}
}
};
// rw_scoped_lock supposes that Mutex operations never throw
template <typename Mutex>
class rw_scoped_lock {
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
constexpr rw_scoped_lock() noexcept {}
//! Acquire lock on given mutex.
rw_scoped_lock(Mutex& m, bool write = true) {
acquire(m, write);
}
//! Release lock (if lock is held).
~rw_scoped_lock() {
if (m_mutex) {
release();
}
}
//! No Copy
rw_scoped_lock(const rw_scoped_lock&) = delete;
rw_scoped_lock& operator=(const rw_scoped_lock&) = delete;
//! Acquire lock on given mutex.
void acquire(Mutex& m, bool write = true) {
__TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired");
m_is_writer = write;
m_mutex = &m;
if (write) {
m_mutex->lock();
} else {
m_mutex->lock_shared();
}
}
//! Try acquire lock on given mutex.
bool try_acquire(Mutex& m, bool write = true) {
bool succeed = write ? m.try_lock() : m.try_lock_shared();
if (succeed) {
m_mutex = &m;
m_is_writer = write;
}
return succeed;
}
//! Release lock.
void release() {
__TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired");
Mutex* m = m_mutex;
m_mutex = nullptr;
if (m_is_writer) {
m->unlock();
} else {
m->unlock_shared();
}
}
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
bool upgrade_to_writer() {
__TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired");
if (m_is_writer) {
return true; // Already a writer
}
m_is_writer = true;
return m_mutex->upgrade();
}
//! Downgrade writer to become a reader.
bool downgrade_to_reader() {
__TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired");
if (m_is_writer) {
m_mutex->downgrade();
m_is_writer = false;
}
return true;
}
bool is_writer() const {
__TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired");
return m_is_writer;
}
protected:
//! The pointer to the current mutex that is held, or nullptr if no mutex is held.
Mutex* m_mutex {nullptr};
//! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock.
/** Not defined if not holding a lock. */
bool m_is_writer {false};
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail_scoped_lock_H

567
third_party/tbb/detail/_segment_table.hh vendored Normal file
View file

@ -0,0 +1,567 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__segment_table_H
#define __TBB_detail__segment_table_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_allocator_traits.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/type_traits"
#include "third_party/libcxx/memory"
#include "third_party/libcxx/cstring"
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning(push)
#pragma warning(disable: 4127) // warning C4127: conditional expression is constant
#endif
namespace tbb {
namespace detail {
namespace d1 {
template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable>
class segment_table {
public:
using value_type = T;
using segment_type = T*;
using atomic_segment = std::atomic<segment_type>;
using segment_table_type = atomic_segment*;
using size_type = std::size_t;
using segment_index_type = std::size_t;
using allocator_type = Allocator;
using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>;
using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>;
protected:
using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>;
using derived_type = DerivedType;
static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable;
static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8;
public:
segment_table( const allocator_type& alloc = allocator_type() )
: my_segment_table_allocator(alloc), my_segment_table(nullptr)
, my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
{
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
}
segment_table( const segment_table& other )
: my_segment_table_allocator(segment_table_allocator_traits::
select_on_container_copy_construction(other.my_segment_table_allocator))
, my_segment_table(nullptr), my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
{
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
try_call( [&] {
internal_transfer(other, copy_segment_body_type{*this});
} ).on_exception( [&] {
clear();
});
}
segment_table( const segment_table& other, const allocator_type& alloc )
: my_segment_table_allocator(alloc), my_segment_table(nullptr)
, my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
{
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
try_call( [&] {
internal_transfer(other, copy_segment_body_type{*this});
} ).on_exception( [&] {
clear();
});
}
segment_table( segment_table&& other )
: my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(nullptr)
, my_first_block{}, my_size{}, my_segment_table_allocation_failed{}
{
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
internal_move(std::move(other));
}
segment_table( segment_table&& other, const allocator_type& alloc )
: my_segment_table_allocator(alloc), my_segment_table(nullptr), my_first_block{}
, my_size{}, my_segment_table_allocation_failed{}
{
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type());
}
~segment_table() {
clear();
}
segment_table& operator=( const segment_table& other ) {
if (this != &other) {
copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
internal_transfer(other, copy_segment_body_type{*this});
}
return *this;
}
segment_table& operator=( segment_table&& other )
noexcept(derived_type::is_noexcept_assignment)
{
using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment;
using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
if (this != &other) {
move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>());
}
return *this;
}
void swap( segment_table& other )
noexcept(derived_type::is_noexcept_swap)
{
using is_equal_type = typename segment_table_allocator_traits::is_always_equal;
using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap;
if (this != &other) {
swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator);
internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>());
}
}
segment_type get_segment( segment_index_type index ) const {
return get_table()[index] + segment_base(index);
}
value_type& operator[]( size_type index ) {
return internal_subscript<true>(index);
}
const value_type& operator[]( size_type index ) const {
return const_cast<segment_table*>(this)->internal_subscript<true>(index);
}
const segment_table_allocator_type& get_allocator() const {
return my_segment_table_allocator;
}
segment_table_allocator_type& get_allocator() {
return my_segment_table_allocator;
}
void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) {
// Allocate new segment
segment_type new_segment = self()->create_segment(table, seg_index, index);
if (new_segment != nullptr) {
// Store (new_segment - segment_base) into the segment table to allow access to the table by index via
// my_segment_table[segment_index_of(index)][index]
segment_type disabled_segment = nullptr;
if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) {
// compare_exchange failed => some other thread has already enabled this segment
// Deallocate the memory
self()->deallocate_segment(new_segment, seg_index);
}
}
segment = table[seg_index].load(std::memory_order_acquire);
__TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table");
}
void delete_segment( segment_index_type seg_index ) {
segment_type segment_to_delete = self()->nullify_segment(get_table(), seg_index);
if (segment_to_delete == segment_allocation_failure_tag) {
return;
}
segment_to_delete += segment_base(seg_index);
// Deallocate the segment
self()->destroy_segment(segment_to_delete, seg_index);
}
size_type number_of_segments( segment_table_type table ) const {
// Check for an active table, if it is embedded table - return the number of embedded segments
// Otherwise - return the maximum number of segments
return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table;
}
size_type capacity() const noexcept {
segment_table_type table = get_table();
size_type num_segments = number_of_segments(table);
for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) {
// Check if the pointer is valid (allocated)
if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) {
return segment_base(seg_index);
}
}
return segment_base(num_segments);
}
size_type find_last_allocated_segment( segment_table_type table ) const noexcept {
size_type end = 0;
size_type num_segments = number_of_segments(table);
for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) {
// Check if the pointer is valid (allocated)
if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) {
end = seg_index + 1;
}
}
return end;
}
void reserve( size_type n ) {
if (n > allocator_traits_type::max_size(my_segment_table_allocator)) {
throw_exception(exception_id::reservation_length_error);
}
size_type size = my_size.load(std::memory_order_relaxed);
segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1;
for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) {
size_type first_index = segment_base(seg_idx);
internal_subscript<true>(first_index);
}
}
void clear() {
clear_segments();
clear_table();
my_size.store(0, std::memory_order_relaxed);
my_first_block.store(0, std::memory_order_relaxed);
}
void clear_segments() {
segment_table_type current_segment_table = get_table();
for (size_type i = number_of_segments(current_segment_table); i != 0; --i) {
if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) {
// If the segment was enabled - disable and deallocate it
delete_segment(i - 1);
}
}
}
void clear_table() {
segment_table_type current_segment_table = get_table();
if (current_segment_table != my_embedded_table) {
// If the active table is not the embedded one - deallocate the active table
for (size_type i = 0; i != pointers_per_long_table; ++i) {
segment_table_allocator_traits::destroy(my_segment_table_allocator, &current_segment_table[i]);
}
segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table);
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
zero_table(my_embedded_table, pointers_per_embedded_table);
}
}
void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) {
// extend_segment_table if an active table is an embedded table
// and the requested index is not in the embedded table
if (table == my_embedded_table && end_index > embedded_table_size) {
if (start_index <= embedded_table_size) {
try_call([&] {
table = self()->allocate_long_table(my_embedded_table, start_index);
// It is possible that the table was extended by the thread that allocated first_block.
// In this case it is necessary to re-read the current table.
if (table) {
my_segment_table.store(table, std::memory_order_release);
} else {
table = my_segment_table.load(std::memory_order_acquire);
}
}).on_exception([&] {
my_segment_table_allocation_failed.store(true, std::memory_order_relaxed);
});
} else {
atomic_backoff backoff;
do {
if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) {
throw_exception(exception_id::bad_alloc);
}
backoff.pause();
table = my_segment_table.load(std::memory_order_acquire);
} while (table == my_embedded_table);
}
}
}
// Return the segment where index is stored
static constexpr segment_index_type segment_index_of( size_type index ) {
return size_type(tbb::detail::log2(uintptr_t(index|1)));
}
// Needed to calculate the offset in segment
static constexpr size_type segment_base( size_type index ) {
return size_type(1) << index & ~size_type(1);
}
// Return size of the segment
static constexpr size_type segment_size( size_type index ) {
return index == 0 ? 2 : size_type(1) << index;
}
private:
derived_type* self() {
return static_cast<derived_type*>(this);
}
struct copy_segment_body_type {
void operator()( segment_index_type index, segment_type from, segment_type to ) const {
my_instance.self()->copy_segment(index, from, to);
}
segment_table& my_instance;
};
struct move_segment_body_type {
void operator()( segment_index_type index, segment_type from, segment_type to ) const {
my_instance.self()->move_segment(index, from, to);
}
segment_table& my_instance;
};
// Transgers all segments from the other table
template <typename TransferBody>
void internal_transfer( const segment_table& other, TransferBody transfer_segment ) {
static_cast<derived_type*>(this)->destroy_elements();
assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed));
my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
segment_table_type other_table = other.get_table();
size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table));
// If an exception occurred in other, then the size may be greater than the size of the end segment.
size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ?
other.my_size.load(std::memory_order_relaxed) : end_segment_size;
other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size;
for (segment_index_type i = 0; segment_base(i) < other_size; ++i) {
// If the segment in other table is enabled - transfer it
if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag)
{
my_size = segment_base(i);
break;
} else if (other_table[i].load(std::memory_order_relaxed) != nullptr) {
internal_subscript<true>(segment_base(i));
transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i),
get_table()[i].load(std::memory_order_relaxed) + segment_base(i));
}
}
}
// Moves the other segment table
// Only equal allocators are allowed
void internal_move( segment_table&& other ) {
// NOTE: allocators should be equal
clear();
my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed);
my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
// If an active table in other is embedded - restore all of the embedded segments
if (other.get_table() == other.my_embedded_table) {
for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) {
segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed);
my_embedded_table[i].store(other_segment, std::memory_order_relaxed);
other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed);
}
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
} else {
my_segment_table.store(other.my_segment_table, std::memory_order_relaxed);
other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed);
zero_table(other.my_embedded_table, pointers_per_embedded_table);
}
other.my_size.store(0, std::memory_order_relaxed);
}
// Move construct the segment table with the allocator object
// if any instances of allocator_type are always equal
void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&,
/*is_always_equal = */ std::true_type ) {
internal_move(std::move(other));
}
// Move construct the segment table with the allocator object
// if any instances of allocator_type are always equal
void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc,
/*is_always_equal = */ std::false_type ) {
if (other.my_segment_table_allocator == alloc) {
// If allocators are equal - restore pointers
internal_move(std::move(other));
} else {
// If allocators are not equal - perform per element move with reallocation
try_call( [&] {
internal_transfer(other, move_segment_body_type{*this});
} ).on_exception( [&] {
clear();
});
}
}
// Move assigns the segment table to other is any instances of allocator_type are always equal
// or propagate_on_container_move_assignment is true
void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) {
internal_move(std::move(other));
}
// Move assigns the segment table to other is any instances of allocator_type are not always equal
// and propagate_on_container_move_assignment is false
void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) {
if (my_segment_table_allocator == other.my_segment_table_allocator) {
// If allocators are equal - restore pointers
internal_move(std::move(other));
} else {
// If allocators are not equal - perform per element move with reallocation
internal_transfer(other, move_segment_body_type{*this});
}
}
// Swaps two segment tables if any instances of allocator_type are always equal
// or propagate_on_container_swap is true
void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) {
internal_swap_fields(other);
}
// Swaps two segment tables if any instances of allocator_type are not always equal
// and propagate_on_container_swap is false
// According to the C++ standard, swapping of two containers with unequal allocators
// is an undefined behavior scenario
void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) {
__TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator,
"Swapping with unequal allocators is not allowed");
internal_swap_fields(other);
}
void internal_swap_fields( segment_table& other ) {
// If an active table in either *this segment table or other is an embedded one - swaps the embedded tables
if (get_table() == my_embedded_table ||
other.get_table() == other.my_embedded_table) {
for (size_type i = 0; i != pointers_per_embedded_table; ++i) {
segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed);
segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed);
my_embedded_table[i].store(other_segment, std::memory_order_relaxed);
other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed);
}
}
segment_table_type current_segment_table = get_table();
segment_table_type other_segment_table = other.get_table();
// If an active table is an embedded one -
// store an active table in other to the embedded one from other
if (current_segment_table == my_embedded_table) {
other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed);
} else {
// Otherwise - store it to the active segment table
other.my_segment_table.store(current_segment_table, std::memory_order_relaxed);
}
// If an active table in other segment table is an embedded one -
// store an active table in other to the embedded one from *this
if (other_segment_table == other.my_embedded_table) {
my_segment_table.store(my_embedded_table, std::memory_order_relaxed);
} else {
// Otherwise - store it to the active segment table in other
my_segment_table.store(other_segment_table, std::memory_order_relaxed);
}
auto first_block = other.my_first_block.load(std::memory_order_relaxed);
other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed);
my_first_block.store(first_block, std::memory_order_relaxed);
auto size = other.my_size.load(std::memory_order_relaxed);
other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed);
my_size.store(size, std::memory_order_relaxed);
}
protected:
// A flag indicates that an exception was throws during segment allocations
const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1);
static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table);
template <bool allow_out_of_range_access>
value_type& internal_subscript( size_type index ) {
segment_index_type seg_index = segment_index_of(index);
segment_table_type table = my_segment_table.load(std::memory_order_acquire);
segment_type segment = nullptr;
if (allow_out_of_range_access) {
if (derived_type::allow_table_extending) {
extend_table_if_necessary(table, index, index + 1);
}
segment = table[seg_index].load(std::memory_order_acquire);
// If the required segment is disabled - enable it
if (segment == nullptr) {
enable_segment(segment, table, seg_index, index);
}
// Check if an exception was thrown during segment allocation
if (segment == segment_allocation_failure_tag) {
throw_exception(exception_id::bad_alloc);
}
} else {
segment = table[seg_index].load(std::memory_order_acquire);
}
__TBB_ASSERT(segment != nullptr, nullptr);
return segment[index];
}
void assign_first_block_if_necessary(segment_index_type index) {
size_type zero = 0;
if (this->my_first_block.load(std::memory_order_relaxed) == zero) {
this->my_first_block.compare_exchange_strong(zero, index);
}
}
void zero_table( segment_table_type table, size_type count ) {
for (size_type i = 0; i != count; ++i) {
table[i].store(nullptr, std::memory_order_relaxed);
}
}
segment_table_type get_table() const {
return my_segment_table.load(std::memory_order_acquire);
}
segment_table_allocator_type my_segment_table_allocator;
std::atomic<segment_table_type> my_segment_table;
atomic_segment my_embedded_table[pointers_per_embedded_table];
// Number of segments in first block
std::atomic<size_type> my_first_block;
// Number of elements in table
std::atomic<size_type> my_size;
// Flag to indicate failed extend table
std::atomic<bool> my_segment_table_allocation_failed;
}; // class segment_table
} // namespace d1
} // namespace detail
} // namespace tbb
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning(pop) // warning 4127 is back
#endif
#endif // __TBB_detail__segment_table_H

View file

@ -0,0 +1,109 @@
// clang-format off
/*
Copyright (c) 2020-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__small_object_pool_H
#define __TBB__small_object_pool_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/profiling.hh"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace d1 {
class small_object_pool {
protected:
small_object_pool() = default;
};
struct execution_data;
}
namespace r1 {
TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes,
const d1::execution_data& ed);
TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes);
TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes,
const d1::execution_data& ed);
TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes);
}
namespace d1 {
class small_object_allocator {
public:
template <typename Type, typename... Args>
Type* new_object(execution_data& ed, Args&&... args) {
void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed);
auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...);
return constructed_object;
}
template <typename Type, typename... Args>
Type* new_object(Args&&... args) {
void* allocated_object = r1::allocate(m_pool, sizeof(Type));
auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...);
return constructed_object;
}
template <typename Type>
void delete_object(Type* object, const execution_data& ed) {
// Copy this since it can be a member of the passed object and
// unintentionally destroyed when Type destructor is called below
small_object_allocator alloc = *this;
object->~Type();
alloc.deallocate(object, ed);
}
template <typename Type>
void delete_object(Type* object) {
// Copy this since it can be a member of the passed object and
// unintentionally destroyed when Type destructor is called below
small_object_allocator alloc = *this;
object->~Type();
alloc.deallocate(object);
}
template <typename Type>
void deallocate(Type* ptr, const execution_data& ed) {
call_itt_task_notify(destroy, ptr);
__TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call");
r1::deallocate(*m_pool, ptr, sizeof(Type), ed);
}
template <typename Type>
void deallocate(Type* ptr) {
call_itt_task_notify(destroy, ptr);
__TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call");
r1::deallocate(*m_pool, ptr, sizeof(Type));
}
private:
small_object_pool* m_pool{};
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif /* __TBB__small_object_pool_H */

View file

@ -0,0 +1,79 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm")
TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for")
TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each")
TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke")
TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce")
TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan")
TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort")
TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline")
TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom")
TBB_STRING_RESOURCE(FLOW_NULL, "null")
TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node")
TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")
TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node")
TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
TBB_STRING_RESOURCE(FLOW_BODY, "body")
TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
TBB_STRING_RESOURCE(FLOW_NODE, "node")
TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph")
TBB_STRING_RESOURCE(USER_EVENT, "user_event")
#if __TBB_FLOW_TRACE_CODEPTR
TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address")
#endif

233
third_party/tbb/detail/_task.hh vendored Normal file
View file

@ -0,0 +1,233 @@
// clang-format off
/*
Copyright (c) 2020-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB__task_H
#define __TBB__task_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_small_object_pool.hh"
#include "third_party/tbb/profiling.hh"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/climits"
#include "third_party/libcxx/utility"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/mutex"
namespace tbb {
namespace detail {
namespace d1 {
using slot_id = unsigned short;
constexpr slot_id no_slot = slot_id(~0);
constexpr slot_id any_slot = slot_id(~1);
class task;
class wait_context;
class task_group_context;
struct execution_data;
}
namespace r1 {
//! Task spawn/wait entry points
TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx);
TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id);
TBB_EXPORT void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx);
TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx);
TBB_EXPORT d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*);
TBB_EXPORT d1::task_group_context* __TBB_EXPORTED_FUNC current_context();
// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms.
struct suspend_point_type;
using suspend_callback_type = void(*)(void*, suspend_point_type*);
//! The resumable tasks entry points
TBB_EXPORT void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback);
TBB_EXPORT void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag);
TBB_EXPORT suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point();
TBB_EXPORT void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr);
class thread_data;
class task_dispatcher;
class external_waiter;
struct task_accessor;
struct task_arena_impl;
} // namespace r1
namespace d1 {
class task_arena;
using suspend_point = r1::suspend_point_type*;
#if __TBB_RESUMABLE_TASKS
template <typename F>
static void suspend_callback(void* user_callback, suspend_point sp) {
// Copy user function to a new stack after the context switch to avoid a race when the previous
// suspend point is resumed while the user_callback is being called.
F user_callback_copy = *static_cast<F*>(user_callback);
user_callback_copy(sp);
}
template <typename F>
void suspend(F f) {
r1::suspend(&suspend_callback<F>, &f);
}
inline void resume(suspend_point tag) {
r1::resume(tag);
}
#endif /* __TBB_RESUMABLE_TASKS */
// TODO align wait_context on cache lane
class wait_context {
static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1);
std::uint64_t m_version_and_traits{1};
std::atomic<std::uint64_t> m_ref_count{};
void add_reference(std::int64_t delta) {
call_itt_task_notify(releasing, this);
std::uint64_t r = m_ref_count.fetch_add(static_cast<std::uint64_t>(delta)) + static_cast<std::uint64_t>(delta);
__TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected");
if (!r) {
// Some external waiters or coroutine waiters sleep in wait list
// Should to notify them that work is done
std::uintptr_t wait_ctx_addr = std::uintptr_t(this);
r1::notify_waiters(wait_ctx_addr);
}
}
bool continue_execution() const {
std::uint64_t r = m_ref_count.load(std::memory_order_acquire);
__TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected");
return r > 0;
}
friend class r1::thread_data;
friend class r1::task_dispatcher;
friend class r1::external_waiter;
friend class task_group;
friend class task_group_base;
friend struct r1::task_arena_impl;
friend struct r1::suspend_point_type;
public:
// Despite the internal reference count is uin64_t we limit the user interface with uint32_t
// to preserve a part of the internal reference count for special needs.
wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); }
wait_context(const wait_context&) = delete;
~wait_context() {
__TBB_ASSERT(!continue_execution(), nullptr);
}
void reserve(std::uint32_t delta = 1) {
add_reference(delta);
}
void release(std::uint32_t delta = 1) {
add_reference(-std::int64_t(delta));
}
};
struct execution_data {
task_group_context* context{};
slot_id original_slot{};
slot_id affinity_slot{};
};
inline task_group_context* context(const execution_data& ed) {
return ed.context;
}
inline slot_id original_slot(const execution_data& ed) {
return ed.original_slot;
}
inline slot_id affinity_slot(const execution_data& ed) {
return ed.affinity_slot;
}
inline slot_id execution_slot(const execution_data& ed) {
return r1::execution_slot(&ed);
}
inline bool is_same_affinity(const execution_data& ed) {
return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed);
}
inline bool is_stolen(const execution_data& ed) {
return original_slot(ed) != execution_slot(ed);
}
inline void spawn(task& t, task_group_context& ctx) {
call_itt_task_notify(releasing, &t);
r1::spawn(t, ctx);
}
inline void spawn(task& t, task_group_context& ctx, slot_id id) {
call_itt_task_notify(releasing, &t);
r1::spawn(t, ctx, id);
}
inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) {
r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx);
call_itt_task_notify(acquired, &wait_ctx);
call_itt_task_notify(destroy, &wait_ctx);
}
inline void wait(wait_context& wait_ctx, task_group_context& ctx) {
r1::wait(wait_ctx, ctx);
call_itt_task_notify(acquired, &wait_ctx);
call_itt_task_notify(destroy, &wait_ctx);
}
using r1::current_context;
class task_traits {
std::uint64_t m_version_and_traits{};
friend struct r1::task_accessor;
};
//! Alignment for a task object
static constexpr std::size_t task_alignment = 64;
//! Base class for user-defined tasks.
/** @ingroup task_scheduling */
class alignas(task_alignment) task : public task_traits {
protected:
virtual ~task() = default;
public:
virtual task* execute(execution_data&) = 0;
virtual task* cancel(execution_data&) = 0;
private:
std::uint64_t m_reserved[6]{};
friend struct r1::task_accessor;
};
static_assert(sizeof(task) == task_alignment, "task size is broken");
} // namespace d1
} // namespace detail
} // namespace tbb
#endif /* __TBB__task_H */

123
third_party/tbb/detail/_task_handle.hh vendored Normal file
View file

@ -0,0 +1,123 @@
// clang-format off
/*
Copyright (c) 2020-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_task_handle_H
#define __TBB_task_handle_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_task.hh"
#include "third_party/tbb/detail/_small_object_pool.hh"
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/libcxx/memory"
namespace tbb {
namespace detail {
namespace d1 { class task_group_context; class wait_context; struct execution_data; }
namespace d2 {
class task_handle;
class task_handle_task : public d1::task {
std::uint64_t m_version_and_traits{};
d1::wait_context& m_wait_ctx;
d1::task_group_context& m_ctx;
d1::small_object_allocator m_allocator;
public:
void finalize(const d1::execution_data* ed = nullptr) {
if (ed) {
m_allocator.delete_object(this, *ed);
} else {
m_allocator.delete_object(this);
}
}
task_handle_task(d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc)
: m_wait_ctx(wo)
, m_ctx(ctx)
, m_allocator(alloc) {
suppress_unused_warning(m_version_and_traits);
}
~task_handle_task() override {
m_wait_ctx.release();
}
d1::task_group_context& ctx() const { return m_ctx; }
};
class task_handle {
struct task_handle_task_finalizer_t{
void operator()(task_handle_task* p){ p->finalize(); }
};
using handle_impl_t = std::unique_ptr<task_handle_task, task_handle_task_finalizer_t>;
handle_impl_t m_handle = {nullptr};
public:
task_handle() = default;
task_handle(task_handle&&) = default;
task_handle& operator=(task_handle&&) = default;
explicit operator bool() const noexcept { return static_cast<bool>(m_handle); }
friend bool operator==(task_handle const& th, std::nullptr_t) noexcept;
friend bool operator==(std::nullptr_t, task_handle const& th) noexcept;
friend bool operator!=(task_handle const& th, std::nullptr_t) noexcept;
friend bool operator!=(std::nullptr_t, task_handle const& th) noexcept;
private:
friend struct task_handle_accessor;
task_handle(task_handle_task* t) : m_handle {t}{};
d1::task* release() {
return m_handle.release();
}
};
struct task_handle_accessor {
static task_handle construct(task_handle_task* t) { return {t}; }
static d1::task* release(task_handle& th) { return th.release(); }
static d1::task_group_context& ctx_of(task_handle& th) {
__TBB_ASSERT(th.m_handle, "ctx_of does not expect empty task_handle.");
return th.m_handle->ctx();
}
};
inline bool operator==(task_handle const& th, std::nullptr_t) noexcept {
return th.m_handle == nullptr;
}
inline bool operator==(std::nullptr_t, task_handle const& th) noexcept {
return th.m_handle == nullptr;
}
inline bool operator!=(task_handle const& th, std::nullptr_t) noexcept {
return th.m_handle != nullptr;
}
inline bool operator!=(std::nullptr_t, task_handle const& th) noexcept {
return th.m_handle != nullptr;
}
} // namespace d2
} // namespace detail
} // namespace tbb
#endif /* __TBB_task_handle_H */

View file

@ -0,0 +1,404 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__template_helpers_H
#define __TBB_detail__template_helpers_H
#include "third_party/tbb/detail/_utils.hh"
#include "third_party/tbb/detail/_config.hh"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/utility"
#include "third_party/libcxx/type_traits"
#include "third_party/libcxx/memory"
#include "third_party/libcxx/iterator"
namespace tbb {
namespace detail {
inline namespace d0 {
// An internal implementation of void_t, which can be used in SFINAE contexts
template <typename...>
struct void_impl {
using type = void;
}; // struct void_impl
template <typename... Args>
using void_t = typename void_impl<Args...>::type;
// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502
template <typename T, typename, template <typename> class... Checks>
struct supports_impl {
using type = std::false_type;
};
template <typename T, template <typename> class... Checks>
struct supports_impl<T, void_t<Checks<T>...>, Checks...> {
using type = std::true_type;
};
template <typename T, template <typename> class... Checks>
using supports = typename supports_impl<T, void, Checks...>::type;
//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size.
template <unsigned u, unsigned long long ull >
struct select_size_t_constant {
// Explicit cast is needed to avoid compiler warnings about possible truncation.
// The value of the right size, which is selected by ?:, is anyway not truncated or promoted.
static const std::size_t value = static_cast<std::size_t>((sizeof(std::size_t)==sizeof(u)) ? u : ull);
};
// TODO: do we really need it?
//! Cast between unrelated pointer types.
/** This method should be used sparingly as a last resort for dealing with
situations that inherently break strict ISO C++ aliasing rules. */
// T is a pointer type because it will be explicitly provided by the programmer as a template argument;
// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process.
template<typename T, typename U>
inline T punned_cast( U* ptr ) {
std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr);
return reinterpret_cast<T>(x);
}
template<class T, size_t S, size_t R>
struct padded_base : T {
char pad[S - R];
};
template<class T, size_t S> struct padded_base<T, S, 0> : T {};
//! Pads type T to fill out to a multiple of cache line size.
template<class T, size_t S = max_nfs_size>
struct padded : padded_base<T, S, sizeof(T) % S> {};
#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT
using std::index_sequence;
using std::make_index_sequence;
#else
template<std::size_t... S> class index_sequence {};
template<std::size_t N, std::size_t... S>
struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {};
template<std::size_t... S>
struct make_index_sequence_impl <0, S...> {
using type = index_sequence<S...>;
};
template<std::size_t N>
using make_index_sequence = typename make_index_sequence_impl<N>::type;
#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */
#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
using std::conjunction;
using std::disjunction;
#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
template <typename...>
struct conjunction : std::true_type {};
template <typename First, typename... Args>
struct conjunction<First, Args...>
: std::conditional<bool(First::value), conjunction<Args...>, First>::type {};
template <typename T>
struct conjunction<T> : T {};
template <typename...>
struct disjunction : std::false_type {};
template <typename First, typename... Args>
struct disjunction<First, Args...>
: std::conditional<bool(First::value), First, disjunction<Args...>>::type {};
template <typename T>
struct disjunction<T> : T {};
#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT
template <typename Iterator>
using iterator_value_t = typename std::iterator_traits<Iterator>::value_type;
template <typename Iterator>
using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type;
template <typename Iterator>
using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type;
template <typename Iterator>
using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type,
iterator_mapped_t<Iterator>>;
template <typename A> using alloc_value_type = typename A::value_type;
template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer;
template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0));
template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0));
// alloc_value_type should be checked first, because it can be used in other checks
template <typename T>
using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>;
#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
template <typename T>
inline constexpr bool is_allocator_v = is_allocator<T>::value;
#endif
// Template class in which the "type" determines the type of the element number N in pack Args
template <std::size_t N, typename... Args>
struct pack_element {
using type = void;
};
template <std::size_t N, typename T, typename... Args>
struct pack_element<N, T, Args...> {
using type = typename pack_element<N-1, Args...>::type;
};
template <typename T, typename... Args>
struct pack_element<0, T, Args...> {
using type = T;
};
template <std::size_t N, typename... Args>
using pack_element_t = typename pack_element<N, Args...>::type;
template <typename Func>
class raii_guard {
public:
static_assert(
std::is_nothrow_copy_constructible<Func>::value &&
std::is_nothrow_move_constructible<Func>::value,
"Throwing an exception during the Func copy or move construction cause an unexpected behavior."
);
raii_guard( Func f ) noexcept : my_func(f), is_active(true) {}
raii_guard( raii_guard&& g ) noexcept : my_func(std::move(g.my_func)), is_active(g.is_active) {
g.is_active = false;
}
~raii_guard() {
if (is_active) {
my_func();
}
}
void dismiss() {
is_active = false;
}
private:
Func my_func;
bool is_active;
}; // class raii_guard
template <typename Func>
raii_guard<Func> make_raii_guard( Func f ) {
return raii_guard<Func>(f);
}
template <typename Body>
struct try_call_proxy {
try_call_proxy( Body b ) : body(b) {}
template <typename OnExceptionBody>
void on_exception( OnExceptionBody on_exception_body ) {
auto guard = make_raii_guard(on_exception_body);
body();
guard.dismiss();
}
template <typename OnCompletionBody>
void on_completion(OnCompletionBody on_completion_body) {
auto guard = make_raii_guard(on_completion_body);
body();
}
Body body;
}; // struct try_call_proxy
// Template helper function for API
// try_call(lambda1).on_exception(lambda2)
// Executes lambda1 and if it throws an exception - executes lambda2
template <typename Body>
try_call_proxy<Body> try_call( Body b ) {
return try_call_proxy<Body>(b);
}
#if __TBB_CPP17_IS_SWAPPABLE_PRESENT
using std::is_nothrow_swappable;
using std::is_swappable;
#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT
namespace is_swappable_detail {
using std::swap;
template <typename T>
using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>()));
#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER
// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant.
template <typename T>
struct noexcept_wrapper {
static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
};
template <typename T>
struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {};
#else
template <typename T>
struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {};
#endif
}
template <typename T>
struct is_swappable : supports<T, is_swappable_detail::has_swap> {};
template <typename T>
struct is_nothrow_swappable
: conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {};
#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT
//! Allows to store a function parameter pack as a variable and later pass it to another function
template< typename... Types >
struct stored_pack;
template<>
struct stored_pack<>
{
using pack_type = stored_pack<>;
stored_pack() {}
// Friend front-end functions
template< typename F, typename Pack > friend void call(F&& f, Pack&& p);
template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p);
protected:
// Ideally, ref-qualified non-static methods would be used,
// but that would greatly reduce the set of compilers where it works.
template< typename Ret, typename F, typename... Preceding >
static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) {
return std::forward<F>(f)(std::forward<Preceding>(params)...);
}
template< typename Ret, typename F, typename... Preceding >
static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) {
return std::forward<F>(f)(std::forward<Preceding>(params)...);
}
};
template< typename T, typename... Types >
struct stored_pack<T, Types...> : stored_pack<Types...>
{
using pack_type = stored_pack<T, Types...>;
using pack_remainder = stored_pack<Types...>;
// Since lifetime of original values is out of control, copies should be made.
// Thus references should be stripped away from the deduced type.
typename std::decay<T>::type leftmost_value;
// Here rvalue references act in the same way as forwarding references,
// as long as class template parameters were deduced via forwarding references.
stored_pack(T&& t, Types&&... types)
: pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {}
// Friend front-end functions
template< typename F, typename Pack > friend void call(F&& f, Pack&& p);
template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p);
protected:
template< typename Ret, typename F, typename... Preceding >
static Ret call(F&& f, pack_type& pack, Preceding&&... params) {
return pack_remainder::template call<Ret>(
std::forward<F>(f), static_cast<pack_remainder&>(pack),
std::forward<Preceding>(params)... , pack.leftmost_value
);
}
template< typename Ret, typename F, typename... Preceding >
static Ret call(F&& f, pack_type&& pack, Preceding&&... params) {
return pack_remainder::template call<Ret>(
std::forward<F>(f), static_cast<pack_remainder&&>(pack),
std::forward<Preceding>(params)... , std::move(pack.leftmost_value)
);
}
};
//! Calls the given function with arguments taken from a stored_pack
template< typename F, typename Pack >
void call(F&& f, Pack&& p) {
std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p));
}
template< typename Ret, typename F, typename Pack >
Ret call_and_return(F&& f, Pack&& p) {
return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p));
}
template< typename... Types >
stored_pack<Types...> save_pack(Types&&... types) {
return stored_pack<Types...>(std::forward<Types>(types)...);
}
// A structure with the value which is equal to Trait::value
// but can be used in the immediate context due to parameter T
template <typename Trait, typename T>
struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {};
template <typename Callable>
struct body_arg_detector;
template <typename Callable, typename ReturnType, typename Arg>
struct body_arg_detector<ReturnType(Callable::*)(Arg)> {
using arg_type = Arg;
};
template <typename Callable, typename ReturnType, typename Arg>
struct body_arg_detector<ReturnType(Callable::*)(Arg) const> {
using arg_type = Arg;
};
template <typename Callable>
struct argument_detector;
template <typename Callable>
struct argument_detector {
using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type;
};
template <typename ReturnType, typename Arg>
struct argument_detector<ReturnType(*)(Arg)> {
using type = Arg;
};
// Detects the argument type of callable, works for callable with one argument.
template <typename Callable>
using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type;
template <typename T>
struct type_identity {
using type = T;
};
template <typename T>
using type_identity_t = typename type_identity<T>::type;
} // inline namespace d0
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__template_helpers_H

394
third_party/tbb/detail/_utils.hh vendored Normal file
View file

@ -0,0 +1,394 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__utils_H
#define __TBB_detail__utils_H
#include "third_party/libcxx/type_traits"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/functional"
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_machine.hh"
namespace tbb {
namespace detail {
inline namespace d0 {
//! Utility template function to prevent "unused" warnings by various compilers.
template<typename... T> void suppress_unused_warning(T&&...) {}
//! Compile-time constant that is upper bound on cache line/sector size.
/** It should be used only in situations where having a compile-time upper
bound is more useful than a run-time exact answer.
@ingroup memory_allocation */
constexpr size_t max_nfs_size = 128;
constexpr std::size_t max_nfs_size_exp = 7;
static_assert(1 << max_nfs_size_exp == max_nfs_size, "max_nfs_size_exp must be a log2(max_nfs_size)");
//! Class that implements exponential backoff.
class atomic_backoff {
//! Time delay, in units of "pause" instructions.
/** Should be equal to approximately the number of "pause" instructions
that take the same time as an context switch. Must be a power of two.*/
static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16;
std::int32_t count;
public:
// In many cases, an object of this type is initialized eagerly on hot path,
// as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
// For this reason, the construction cost must be very small!
atomic_backoff() : count(1) {}
// This constructor pauses immediately; do not use on hot paths!
atomic_backoff(bool) : count(1) { pause(); }
//! No Copy
atomic_backoff(const atomic_backoff&) = delete;
atomic_backoff& operator=(const atomic_backoff&) = delete;
//! Pause for a while.
void pause() {
if (count <= LOOPS_BEFORE_YIELD) {
machine_pause(count);
// Pause twice as long the next time.
count *= 2;
} else {
// Pause is so long that we might as well yield CPU to scheduler.
yield();
}
}
//! Pause for a few times and return false if saturated.
bool bounded_pause() {
machine_pause(count);
if (count < LOOPS_BEFORE_YIELD) {
// Pause twice as long the next time.
count *= 2;
return true;
} else {
return false;
}
}
void reset() {
count = 1;
}
};
//! Spin WHILE the condition is true.
/** T and U should be comparable types. */
template <typename T, typename C>
T spin_wait_while(const std::atomic<T>& location, C comp, std::memory_order order) {
atomic_backoff backoff;
T snapshot = location.load(order);
while (comp(snapshot)) {
backoff.pause();
snapshot = location.load(order);
}
return snapshot;
}
//! Spin WHILE the value of the variable is equal to a given value
/** T and U should be comparable types. */
template <typename T, typename U>
T spin_wait_while_eq(const std::atomic<T>& location, const U value, std::memory_order order = std::memory_order_acquire) {
return spin_wait_while(location, [&value](T t) { return t == value; }, order);
}
//! Spin UNTIL the value of the variable is equal to a given value
/** T and U should be comparable types. */
template<typename T, typename U>
T spin_wait_until_eq(const std::atomic<T>& location, const U value, std::memory_order order = std::memory_order_acquire) {
return spin_wait_while(location, [&value](T t) { return t != value; }, order);
}
//! Spin UNTIL the condition returns true or spinning time is up.
/** Returns what the passed functor returned last time it was invoked. */
template <typename Condition>
bool timed_spin_wait_until(Condition condition) {
// 32 pauses + 32 yields are meausered as balanced spin time before sleep.
bool finish = condition();
for (int i = 1; !finish && i < 32; finish = condition(), i *= 2) {
machine_pause(i);
}
for (int i = 32; !finish && i < 64; finish = condition(), ++i) {
yield();
}
return finish;
}
template <typename T>
T clamp(T value, T lower_bound, T upper_bound) {
__TBB_ASSERT(lower_bound <= upper_bound, "Incorrect bounds");
return value > lower_bound ? (value > upper_bound ? upper_bound : value) : lower_bound;
}
template <typename T>
std::uintptr_t log2(T in) {
__TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined.");
return machine_log2(in);
}
template<typename T>
T reverse_bits(T src) {
return machine_reverse_bits(src);
}
template<typename T>
T reverse_n_bits(T src, std::size_t n) {
__TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior.");
return reverse_bits(src) >> (number_of_bits<T>() - n);
}
// A function to check if passed integer is a power of two
template <typename IntegerType>
constexpr bool is_power_of_two( IntegerType arg ) {
static_assert(std::is_integral<IntegerType>::value,
"An argument for is_power_of_two should be integral type");
return arg && (0 == (arg & (arg - 1)));
}
// A function to determine if passed integer is a power of two
// at least as big as another power of two, i.e. for strictly positive i and j,
// with j being a power of two, determines whether i==j<<k for some nonnegative k
template <typename ArgIntegerType, typename DivisorIntegerType>
constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) {
// Divisor should be a power of two
static_assert(std::is_integral<ArgIntegerType>::value,
"An argument for is_power_of_two_at_least should be integral type");
return 0 == (arg & (arg - divisor));
}
// A function to compute arg modulo divisor where divisor is a power of 2.
template<typename ArgIntegerType, typename DivisorIntegerType>
inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) {
__TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" );
return arg & (divisor - 1);
}
//! A function to check if passed in pointer is aligned on a specific border
template<typename T>
constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) {
return 0 == (reinterpret_cast<std::uintptr_t>(pointer) & (alignment - 1));
}
#if TBB_USE_ASSERT
static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
//! Set p to invalid pointer value.
template<typename T>
inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); }
template<typename T>
inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); }
/** Expected to be used in assertions only, thus no empty form is defined. **/
template<typename T>
inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); }
template<typename T>
inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); }
#else
template<typename T>
inline void poison_pointer(T&) {/*do nothing*/}
#endif /* !TBB_USE_ASSERT */
template <std::size_t alignment = 0, typename T>
bool assert_pointer_valid(T* p, const char* comment = nullptr) {
suppress_unused_warning(p, comment);
__TBB_ASSERT(p != nullptr, comment);
__TBB_ASSERT(!is_poisoned(p), comment);
#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER)
__TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment);
#endif
// Returns something to simplify assert_pointers_valid implementation.
return true;
}
template <typename... Args>
void assert_pointers_valid(Args*... p) {
// suppress_unused_warning is used as an evaluation context for the variadic pack.
suppress_unused_warning(assert_pointer_valid(p)...);
}
//! Base class for types that should not be assigned.
class no_assign {
public:
void operator=(const no_assign&) = delete;
no_assign(const no_assign&) = default;
no_assign() = default;
};
//! Base class for types that should not be copied or assigned.
class no_copy: no_assign {
public:
no_copy(const no_copy&) = delete;
no_copy() = default;
};
template <typename T>
void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){
T tmp = lhs.load(std::memory_order_relaxed);
lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed);
rhs.store(tmp, std::memory_order_relaxed);
}
//! One-time initialization states
enum class do_once_state {
uninitialized = 0, ///< No execution attempts have been undertaken yet
pending, ///< A thread is executing associated do-once routine
executed, ///< Do-once routine has been executed
initialized = executed ///< Convenience alias
};
//! One-time initialization function
/** /param initializer Pointer to function without arguments
The variant that returns bool is used for cases when initialization can fail
and it is OK to continue execution, but the state should be reset so that
the initialization attempt was repeated the next time.
/param state Shared state associated with initializer that specifies its
initialization state. Must be initially set to #uninitialized value
(e.g. by means of default static zero initialization). **/
template <typename F>
void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) {
// The loop in the implementation is necessary to avoid race when thread T2
// that arrived in the middle of initialization attempt by another thread T1
// has just made initialization possible.
// In such a case T2 has to rely on T1 to initialize, but T1 may already be past
// the point where it can recognize the changed conditions.
do_once_state expected_state;
while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) {
if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) {
expected_state = do_once_state::uninitialized;
#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910
using enum_type = typename std::underlying_type<do_once_state>::type;
if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) {
#else
if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) {
#endif
run_initializer( initializer, state );
break;
}
}
spin_wait_while_eq( state, do_once_state::pending );
}
}
// Run the initializer which can not fail
template<typename Functor>
void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) {
f();
state.store(do_once_state::executed, std::memory_order_release);
}
#if __TBB_CPP20_CONCEPTS_PRESENT
template <typename T>
concept boolean_testable_impl = std::convertible_to<T, bool>;
template <typename T>
concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) {
{ !std::forward<T>(t) } -> boolean_testable_impl;
};
#if __TBB_CPP20_COMPARISONS_PRESENT
struct synthesized_three_way_comparator {
template <typename T1, typename T2>
auto operator()( const T1& lhs, const T2& rhs ) const
requires requires {
{ lhs < rhs } -> boolean_testable;
{ rhs < lhs } -> boolean_testable;
}
{
if constexpr (std::three_way_comparable_with<T1, T2>) {
return lhs <=> rhs;
} else {
if (lhs < rhs) {
return std::weak_ordering::less;
}
if (rhs < lhs) {
return std::weak_ordering::greater;
}
return std::weak_ordering::equivalent;
}
}
}; // struct synthesized_three_way_comparator
template <typename T1, typename T2 = T1>
using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(),
std::declval<T2&>()));
#endif // __TBB_CPP20_COMPARISONS_PRESENT
// Check if the type T is implicitly OR explicitly convertible to U
template <typename T, typename U>
concept relaxed_convertible_to = std::constructible_from<U, T>;
template <typename T, typename U>
concept adaptive_same_as =
#if __TBB_STRICT_CONSTRAINTS
std::same_as<T, U>;
#else
std::convertible_to<T, U>;
#endif
#endif // __TBB_CPP20_CONCEPTS_PRESENT
template <typename F, typename... Args>
auto invoke(F&& f, Args&&... args)
#if __TBB_CPP17_INVOKE_PRESENT
noexcept(std::is_nothrow_invocable_v<F, Args...>)
-> std::invoke_result_t<F, Args...>
{
return std::invoke(std::forward<F>(f), std::forward<Args>(args)...);
}
#else // __TBB_CPP17_INVOKE_PRESENT
noexcept(noexcept(std::forward<F>(f)(std::forward<Args>(args)...)))
-> decltype(std::forward<F>(f)(std::forward<Args>(args)...))
{
return std::forward<F>(f)(std::forward<Args>(args)...);
}
#endif // __TBB_CPP17_INVOKE_PRESENT
} // namespace d0
namespace d1 {
class delegate_base {
public:
virtual bool operator()() const = 0;
virtual ~delegate_base() {}
};
template <typename FuncType>
class delegated_function : public delegate_base {
public:
delegated_function(FuncType& f) : my_func(f) {}
bool operator()() const override {
return my_func();
}
private:
FuncType &my_func;
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__utils_H

View file

@ -0,0 +1,105 @@
// clang-format off
/*
Copyright (c) 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_detail__address_waiters_H
#define __TBB_detail__address_waiters_H
#include "third_party/tbb/detail/_utils.hh"
namespace tbb {
namespace detail {
namespace r1 {
TBB_EXPORT void __TBB_EXPORTED_FUNC wait_on_address(void* address, d1::delegate_base& wakeup_condition, std::uintptr_t context);
TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address(void* address, std::uintptr_t context);
TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_one(void* address);
TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_all(void* address);
} // namespace r1
namespace d1 {
template <typename Predicate>
void adaptive_wait_on_address(void* address, Predicate wakeup_condition, std::uintptr_t context) {
if (!timed_spin_wait_until(wakeup_condition)) {
d1::delegated_function<Predicate> pred(wakeup_condition);
r1::wait_on_address(address, pred, context);
}
}
template <typename T>
class waitable_atomic {
public:
waitable_atomic() = default;
explicit waitable_atomic(T value) : my_atomic(value) {}
waitable_atomic(const waitable_atomic&) = delete;
waitable_atomic& operator=(const waitable_atomic&) = delete;
T load(std::memory_order order) const noexcept {
return my_atomic.load(order);
}
T exchange(T desired) noexcept {
return my_atomic.exchange(desired);
}
void wait(T old, std::uintptr_t context, std::memory_order order) {
auto wakeup_condition = [&] { return my_atomic.load(order) != old; };
if (!timed_spin_wait_until(wakeup_condition)) {
// We need to use while here, because notify_all() will wake up all threads
// But predicate for them might be false
d1::delegated_function<decltype(wakeup_condition)> pred(wakeup_condition);
do {
r1::wait_on_address(this, pred, context);
} while (!wakeup_condition());
}
}
void wait_until(T expected, std::uintptr_t context, std::memory_order order) {
auto wakeup_condition = [&] { return my_atomic.load(order) == expected; };
if (!timed_spin_wait_until(wakeup_condition)) {
// We need to use while here, because notify_all() will wake up all threads
// But predicate for them might be false
d1::delegated_function<decltype(wakeup_condition)> pred(wakeup_condition);
do {
r1::wait_on_address(this, pred, context);
} while (!wakeup_condition());
}
}
void notify_relaxed(std::uintptr_t context) {
r1::notify_by_address(this, context);
}
void notify_one_relaxed() {
r1::notify_by_address_one(this);
}
// TODO: consider adding following interfaces:
// store(desired, memory_order)
// notify_all_relaxed()
private:
std::atomic<T> my_atomic{};
};
} // namespace d1
} // namespace detail
} // namespace tbb
#endif // __TBB_detail__address_waiters_H

516
third_party/tbb/dynamic_link.cc vendored Normal file
View file

@ -0,0 +1,516 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/dynamic_link.hh"
#include "third_party/tbb/environment.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/detail/_utils.hh"
/*
This file is used by both TBB and OpenMP RTL. Do not use __TBB_ASSERT() macro
and runtime_warning() function because they are not available in OpenMP. Use
__TBB_ASSERT_EX and DYNAMIC_LINK_WARNING instead.
*/
#include "third_party/libcxx/cstdarg" // va_list etc.
#include "third_party/libcxx/cstring" // strrchr
#if _WIN32
#include "libc/mem/mem.h"
// Unify system calls
#define dlopen( name, flags ) LoadLibrary( name )
#define dlsym( handle, name ) GetProcAddress( handle, name )
#define dlclose( handle ) ( ! FreeLibrary( handle ) )
#define dlerror() GetLastError()
#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif
#else /* _WIN32 */
#include "libc/runtime/dlfcn.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
#include "third_party/libcxx/climits"
#include "third_party/libcxx/cstdlib"
#endif /* _WIN32 */
#if __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED
//TODO: use function attribute for weak symbols instead of the pragma.
#pragma weak dlopen
#pragma weak dlsym
#pragma weak dlclose
#endif /* __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED */
#define __USE_STATIC_DL_INIT ( !__ANDROID__ )
/*
dynamic_link is a common interface for searching for required symbols in an
executable and dynamic libraries.
dynamic_link provides certain guarantees:
1. Either all or none of the requested symbols are resolved. Moreover, if
symbols are not resolved, the dynamic_link_descriptor table is not modified;
2. All returned symbols have secured lifetime: this means that none of them
can be invalidated until dynamic_unlink is called;
3. Any loaded library is loaded only via the full path. The full path is that
from which the runtime itself was loaded. (This is done to avoid security
issues caused by loading libraries from insecure paths).
dynamic_link searches for the requested symbols in three stages, stopping as
soon as all of the symbols have been resolved.
1. Search the global scope:
a. On Windows: dynamic_link tries to obtain the handle of the requested
library and if it succeeds it resolves the symbols via that handle.
b. On Linux: dynamic_link tries to search for the symbols in the global
scope via the main program handle. If the symbols are present in the global
scope their lifetime is not guaranteed (since dynamic_link does not know
anything about the library from which they are exported). Therefore it
tries to "pin" the symbols by obtaining the library name and reopening it.
dlopen may fail to reopen the library in two cases:
i. The symbols are exported from the executable. Currently dynamic _link
cannot handle this situation, so it will not find these symbols in this
step.
ii. The necessary library has been unloaded and cannot be reloaded. It
seems there is nothing that can be done in this case. No symbols are
returned.
2. Dynamic load: an attempt is made to load the requested library via the
full path.
The full path used is that from which the runtime itself was loaded. If the
library can be loaded, then an attempt is made to resolve the requested
symbols in the newly loaded library.
If the symbols are not found the library is unloaded.
3. Weak symbols: if weak symbols are available they are returned.
*/
namespace tbb {
namespace detail {
namespace r1 {
#if __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED
#if !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED
// Report runtime errors and continue.
#define DYNAMIC_LINK_WARNING dynamic_link_warning
static void dynamic_link_warning( dynamic_link_error_t code, ... ) {
suppress_unused_warning(code);
} // library_warning
#endif /* !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED */
static bool resolve_symbols( dynamic_link_handle module, const dynamic_link_descriptor descriptors[], std::size_t required )
{
if ( !module )
return false;
#if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
if ( !dlsym ) return false;
#endif /* !__TBB_DYNAMIC_LOAD_ENABLED */
const std::size_t n_desc=20; // Usually we don't have more than 20 descriptors per library
__TBB_ASSERT_EX( required <= n_desc, "Too many descriptors is required" );
if ( required > n_desc ) return false;
pointer_to_handler h[n_desc];
for ( std::size_t k = 0; k < required; ++k ) {
dynamic_link_descriptor const & desc = descriptors[k];
pointer_to_handler addr = (pointer_to_handler)dlsym( module, desc.name );
if ( !addr ) {
return false;
}
h[k] = addr;
}
// Commit the entry points.
// Cannot use memset here, because the writes must be atomic.
for( std::size_t k = 0; k < required; ++k )
*descriptors[k].handler = h[k];
return true;
}
#if __TBB_WIN8UI_SUPPORT
bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) {
dynamic_link_handle tmp_handle = nullptr;
TCHAR wlibrary[256];
if ( MultiByteToWideChar(CP_UTF8, 0, library, -1, wlibrary, 255) == 0 ) return false;
if ( flags & DYNAMIC_LINK_LOAD )
tmp_handle = LoadPackagedLibrary( wlibrary, 0 );
if (tmp_handle != nullptr){
return resolve_symbols(tmp_handle, descriptors, required);
}else{
return false;
}
}
void dynamic_unlink( dynamic_link_handle ) {}
void dynamic_unlink_all() {}
#else
#if __TBB_DYNAMIC_LOAD_ENABLED
/*
There is a security issue on Windows: LoadLibrary() may load and execute malicious code.
See http://www.microsoft.com/technet/security/advisory/2269637.mspx for details.
To avoid the issue, we have to pass full path (not just library name) to LoadLibrary. This
function constructs full path to the specified library (it is assumed the library located
side-by-side with the tbb.dll.
The function constructs absolute path for given relative path. Important: Base directory is not
current one, it is the directory tbb.dll loaded from.
Example:
Let us assume "tbb.dll" is located in "c:\program files\common\intel\" directory, e.g.
absolute path of the library is "c:\program files\common\intel\tbb.dll". Absolute path for
"tbbmalloc.dll" would be "c:\program files\common\intel\tbbmalloc.dll". Absolute path for
"malloc\tbbmalloc.dll" would be "c:\program files\common\intel\malloc\tbbmalloc.dll".
*/
// Struct handle_storage is used by dynamic_link routine to store handles of
// all loaded or pinned dynamic libraries. When TBB is shut down, it calls
// dynamic_unlink_all() that unloads modules referenced by handle_storage.
// This struct should not have any constructors since it may be used before
// the constructor is called.
#define MAX_LOADED_MODULES 8 // The number of maximum possible modules which can be loaded
using atomic_incrementer = std::atomic<std::size_t>;
static struct handles_t {
atomic_incrementer my_size;
dynamic_link_handle my_handles[MAX_LOADED_MODULES];
void add(const dynamic_link_handle &handle) {
const std::size_t ind = my_size++;
__TBB_ASSERT_EX( ind < MAX_LOADED_MODULES, "Too many modules are loaded" );
my_handles[ind] = handle;
}
void free() {
const std::size_t size = my_size;
for (std::size_t i=0; i<size; ++i)
dynamic_unlink( my_handles[i] );
}
} handles;
static std::once_flag init_dl_data_state;
static struct ap_data_t {
char _path[PATH_MAX+1];
std::size_t _len;
} ap_data;
static void init_ap_data() {
#if _WIN32
// Get handle of our DLL first.
HMODULE handle;
BOOL brc = GetModuleHandleEx(
GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
(LPCSTR)( & dynamic_link ), // any function inside the library can be used for the address
& handle
);
if ( !brc ) { // Error occurred.
int err = GetLastError();
DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleHandleEx", err );
return;
}
// Now get path to our DLL.
DWORD drc = GetModuleFileName( handle, ap_data._path, static_cast< DWORD >( PATH_MAX ) );
if ( drc == 0 ) { // Error occurred.
int err = GetLastError();
DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleFileName", err );
return;
}
if ( drc >= PATH_MAX ) { // Buffer too short.
DYNAMIC_LINK_WARNING( dl_buff_too_small );
return;
}
// Find the position of the last backslash.
char *backslash = std::strrchr( ap_data._path, '\\' );
if ( !backslash ) { // Backslash not found.
__TBB_ASSERT_EX( backslash != nullptr, "Unbelievable.");
return;
}
__TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable.");
ap_data._len = (std::size_t)(backslash - ap_data._path) + 1;
*(backslash+1) = 0;
#else
// Get the library path
Dl_info dlinfo;
int res = dladdr( (void*)&dynamic_link, &dlinfo ); // any function inside the library can be used for the address
if ( !res ) {
char const * err = dlerror();
DYNAMIC_LINK_WARNING( dl_sys_fail, "dladdr", err );
return;
} else {
__TBB_ASSERT_EX( dlinfo.dli_fname!=nullptr, "Unbelievable." );
}
char const *slash = std::strrchr( dlinfo.dli_fname, '/' );
std::size_t fname_len=0;
if ( slash ) {
__TBB_ASSERT_EX( slash >= dlinfo.dli_fname, "Unbelievable.");
fname_len = (std::size_t)(slash - dlinfo.dli_fname) + 1;
}
std::size_t rc;
if ( dlinfo.dli_fname[0]=='/' ) {
// The library path is absolute
rc = 0;
ap_data._len = 0;
} else {
// The library path is relative so get the current working directory
if ( !getcwd( ap_data._path, sizeof(ap_data._path)/sizeof(ap_data._path[0]) ) ) {
DYNAMIC_LINK_WARNING( dl_buff_too_small );
return;
}
ap_data._len = std::strlen( ap_data._path );
ap_data._path[ap_data._len++]='/';
rc = ap_data._len;
}
if ( fname_len>0 ) {
ap_data._len += fname_len;
if ( ap_data._len>PATH_MAX ) {
DYNAMIC_LINK_WARNING( dl_buff_too_small );
ap_data._len=0;
return;
}
std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len );
ap_data._path[ap_data._len]=0;
}
#endif /* _WIN32 */
}
static void init_dl_data() {
init_ap_data();
}
/*
The function constructs absolute path for given relative path. Important: Base directory is not
current one, it is the directory libtbb.so loaded from.
Arguments:
in name -- Name of a file (may be with relative path; it must not be an absolute one).
out path -- Buffer to save result (absolute path) to.
in len -- Size of buffer.
ret -- 0 -- Error occurred.
> len -- Buffer too short, required size returned.
otherwise -- Ok, number of characters (incl. terminating null) written to buffer.
*/
static std::size_t abs_path( char const * name, char * path, std::size_t len ) {
if ( ap_data._len == 0 )
return 0;
std::size_t name_len = std::strlen( name );
std::size_t full_len = name_len+ap_data._len;
if ( full_len < len ) {
__TBB_ASSERT( ap_data._path[ap_data._len] == 0, nullptr);
__TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, nullptr);
std::strncpy( path, ap_data._path, ap_data._len + 1 );
__TBB_ASSERT( path[ap_data._len] == 0, nullptr);
std::strncat( path, name, len - ap_data._len );
__TBB_ASSERT( std::strlen(path) == full_len, nullptr);
}
return full_len+1; // +1 for null character
}
#endif // __TBB_DYNAMIC_LOAD_ENABLED
void init_dynamic_link_data() {
#if __TBB_DYNAMIC_LOAD_ENABLED
std::call_once( init_dl_data_state, init_dl_data );
#endif
}
#if __USE_STATIC_DL_INIT
// ap_data structure is initialized with current directory on Linux.
// So it should be initialized as soon as possible since the current directory may be changed.
// static_init_ap_data object provides this initialization during library loading.
static struct static_init_dl_data_t {
static_init_dl_data_t() {
init_dynamic_link_data();
}
} static_init_dl_data;
#endif
#if __TBB_WEAK_SYMBOLS_PRESENT
static bool weak_symbol_link( const dynamic_link_descriptor descriptors[], std::size_t required )
{
// Check if the required entries are present in what was loaded into our process.
for ( std::size_t k = 0; k < required; ++k )
if ( !descriptors[k].ptr )
return false;
// Commit the entry points.
for ( std::size_t k = 0; k < required; ++k )
*descriptors[k].handler = (pointer_to_handler) descriptors[k].ptr;
return true;
}
#else
static bool weak_symbol_link( const dynamic_link_descriptor[], std::size_t ) {
return false;
}
#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
void dynamic_unlink( dynamic_link_handle handle ) {
#if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
if ( !dlclose ) return;
#endif
if ( handle ) {
dlclose( handle );
}
}
void dynamic_unlink_all() {
#if __TBB_DYNAMIC_LOAD_ENABLED
handles.free();
#endif
}
static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) {
dynamic_link_handle library_handle{};
#if _WIN32
auto res = GetModuleHandleEx(0, library, &library_handle);
__TBB_ASSERT_EX((res && library_handle) || (!res && !library_handle), nullptr);
#else /* _WIN32 */
#if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
if ( !dlopen ) return 0;
#endif /* !__TBB_DYNAMIC_LOAD_ENABLED */
// RTLD_GLOBAL - to guarantee that old TBB will find the loaded library
// RTLD_NOLOAD - not to load the library without the full path
library_handle = dlopen(library, RTLD_LAZY | RTLD_GLOBAL | RTLD_NOLOAD);
#endif /* _WIN32 */
if (library_handle) {
if (!resolve_symbols(library_handle, descriptors, required)) {
dynamic_unlink(library_handle);
library_handle = nullptr;
}
}
return library_handle;
}
static void save_library_handle( dynamic_link_handle src, dynamic_link_handle *dst ) {
__TBB_ASSERT_EX( src, "The library handle to store must be non-zero" );
if ( dst )
*dst = src;
#if __TBB_DYNAMIC_LOAD_ENABLED
else
handles.add( src );
#endif /* __TBB_DYNAMIC_LOAD_ENABLED */
}
#if !_WIN32
int loading_flags(bool local_binding) {
int flags = RTLD_NOW;
if (local_binding) {
flags = flags | RTLD_LOCAL;
#if (__linux__ && __GLIBC__) && !__TBB_USE_SANITIZERS
if( !GetBoolEnvironmentVariable("TBB_ENABLE_SANITIZERS") ) {
flags = flags | RTLD_DEEPBIND;
}
#endif
} else {
flags = flags | RTLD_GLOBAL;
}
return flags;
}
#endif
dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, bool local_binding ) {
::tbb::detail::suppress_unused_warning( library, descriptors, required, local_binding );
#if __TBB_DYNAMIC_LOAD_ENABLED
std::size_t const len = PATH_MAX + 1;
char path[ len ];
std::size_t rc = abs_path( library, path, len );
if ( 0 < rc && rc <= len ) {
#if _WIN32
// Prevent Windows from displaying silly message boxes if it fails to load library
// (e.g. because of MS runtime problems - one of those crazy manifest related ones)
UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS);
#endif /* _WIN32 */
// The second argument (loading_flags) is ignored on Windows
dynamic_link_handle library_handle = dlopen( path, loading_flags(local_binding) );
#if _WIN32
SetErrorMode (prev_mode);
#endif /* _WIN32 */
if( library_handle ) {
if( !resolve_symbols( library_handle, descriptors, required ) ) {
// The loaded library does not contain all the expected entry points
dynamic_unlink( library_handle );
library_handle = nullptr;
}
} else
DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() );
return library_handle;
} else if ( rc>len )
DYNAMIC_LINK_WARNING( dl_buff_too_small );
// rc == 0 means failing of init_ap_data so the warning has already been issued.
#endif /* __TBB_DYNAMIC_LOAD_ENABLED */
return nullptr;
}
bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) {
init_dynamic_link_data();
// TODO: May global_symbols_link find weak symbols?
dynamic_link_handle library_handle = ( flags & DYNAMIC_LINK_GLOBAL ) ? global_symbols_link( library, descriptors, required ) : nullptr;
#if defined(_MSC_VER) && _MSC_VER <= 1900
#pragma warning (push)
// MSVC 2015 warning: 'int': forcing value to bool 'true' or 'false'
#pragma warning (disable: 4800)
#endif
if ( !library_handle && ( flags & DYNAMIC_LINK_LOAD ) )
library_handle = dynamic_load( library, descriptors, required, flags & DYNAMIC_LINK_LOCAL );
#if defined(_MSC_VER) && _MSC_VER <= 1900
#pragma warning (pop)
#endif
if ( !library_handle && ( flags & DYNAMIC_LINK_WEAK ) )
return weak_symbol_link( descriptors, required );
if ( library_handle ) {
save_library_handle( library_handle, handle );
return true;
}
return false;
}
#endif /*__TBB_WIN8UI_SUPPORT*/
#else /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */
bool dynamic_link( const char*, const dynamic_link_descriptor*, std::size_t, dynamic_link_handle *handle, int ) {
if ( handle )
*handle=0;
return false;
}
void dynamic_unlink( dynamic_link_handle ) {}
void dynamic_unlink_all() {}
#endif /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */
} // namespace r1
} // namespace detail
} // namespace tbb

137
third_party/tbb/dynamic_link.hh vendored Normal file
View file

@ -0,0 +1,137 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_dynamic_link
#define __TBB_dynamic_link
// Support for dynamic loading entry points from other shared libraries.
#include "third_party/tbb/detail/_config.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/mutex"
/** By default, symbols declared and defined here go into namespace tbb::internal.
To put them in other namespace, define macros OPEN_INTERNAL_NAMESPACE
and CLOSE_INTERNAL_NAMESPACE to override the following default definitions. **/
#include "third_party/libcxx/cstddef"
#ifdef _WIN32
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
#endif /* _WIN32 */
namespace tbb {
namespace detail {
namespace r1 {
//! Type definition for a pointer to a void somefunc(void)
typedef void (*pointer_to_handler)();
//! The helper to construct dynamic_link_descriptor structure
// Double cast through the void* in DLD macro is necessary to
// prevent warnings from some compilers (g++ 4.1)
#if __TBB_WEAK_SYMBOLS_PRESENT
#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h), (pointer_to_handler)&s}
#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), nullptr}
#else
#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h)}
#define DLD_NOWEAK(s,h) DLD(s,h)
#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
//! Association between a handler name and location of pointer to it.
struct dynamic_link_descriptor {
//! Name of the handler
const char* name;
//! Pointer to the handler
pointer_to_handler* handler;
#if __TBB_WEAK_SYMBOLS_PRESENT
//! Weak symbol
pointer_to_handler ptr;
#endif
};
#if _WIN32
using dynamic_link_handle = HMODULE;
#else
using dynamic_link_handle = void*;
#endif /* _WIN32 */
const int DYNAMIC_LINK_GLOBAL = 0x01;
const int DYNAMIC_LINK_LOAD = 0x02;
const int DYNAMIC_LINK_WEAK = 0x04;
const int DYNAMIC_LINK_LOCAL = 0x08;
const int DYNAMIC_LINK_LOCAL_BINDING = DYNAMIC_LINK_LOCAL | DYNAMIC_LINK_LOAD;
const int DYNAMIC_LINK_DEFAULT = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK;
//! Fill in dynamically linked handlers.
/** 'library' is the name of the requested library. It should not contain a full
path since dynamic_link adds the full path (from which the runtime itself
was loaded) to the library name.
'required' is the number of the initial entries in the array descriptors[]
that have to be found in order for the call to succeed. If the library and
all the required handlers are found, then the corresponding handler
pointers are set, and the return value is true. Otherwise the original
array of descriptors is left untouched and the return value is false.
'required' is limited by 20 (exceeding of this value will result in failure
to load the symbols and the return value will be false).
'handle' is the handle of the library if it is loaded. Otherwise it is left
untouched.
'flags' is the set of DYNAMIC_LINK_* flags. Each of the DYNAMIC_LINK_* flags
allows its corresponding linking stage.
**/
bool dynamic_link( const char* library,
const dynamic_link_descriptor descriptors[],
std::size_t required,
dynamic_link_handle* handle = nullptr,
int flags = DYNAMIC_LINK_DEFAULT );
void dynamic_unlink( dynamic_link_handle handle );
void dynamic_unlink_all();
enum dynamic_link_error_t {
dl_success = 0,
dl_lib_not_found, // char const * lib, dlerr_t err
dl_sym_not_found, // char const * sym, dlerr_t err
// Note: dlerr_t depends on OS: it is char const * on Linux* and macOS*, int on Windows*.
dl_sys_fail, // char const * func, int err
dl_buff_too_small // none
}; // dynamic_link_error_t
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* __TBB_dynamic_link */

File diff suppressed because it is too large Load diff

82
third_party/tbb/environment.hh vendored Normal file
View file

@ -0,0 +1,82 @@
// clang-format off
/*
Copyright (c) 2018-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_tbb_environment_H
#define __TBB_tbb_environment_H
#include "third_party/libcxx/cstdlib"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/cerrno"
#include "third_party/libcxx/cctype"
namespace tbb {
namespace detail {
namespace r1 {
#if __TBB_WIN8UI_SUPPORT
static inline bool GetBoolEnvironmentVariable( const char * ) {
return false;
}
static inline long GetIntegralEnvironmentVariable( const char * ) {
return -1;
}
#else /* __TBB_WIN8UI_SUPPORT */
static inline bool GetBoolEnvironmentVariable( const char * name ) {
if ( const char* s = std::getenv(name) ) {
// The result is defined as true only if the environment variable contains
// no characters except one '1' character and an arbitrary number of spaces
// (including the absence of spaces).
size_t index = std::strspn(s, " ");
if (s[index] != '1') return false;
index++;
// Memory access after incrementing is safe, since the getenv() returns a
// null-terminated string, and even if the character getting by index is '1',
// and this character is the end of string, after incrementing we will get
// an index of character, that contains '\0'
index += std::strspn(&s[index], " ");
return !s[index];
}
return false;
}
static inline long GetIntegralEnvironmentVariable( const char * name ) {
if ( const char* s = std::getenv(name) ) {
char* end = nullptr;
errno = 0;
long value = std::strtol(s, &end, 10);
// We have exceeded the range, value is negative or string is incovertable
if ( errno == ERANGE || value < 0 || end==s ) {
return -1;
}
for ( ; *end != '\0'; end++ ) {
if ( !std::isspace(*end) ) {
return -1;
}
}
return value;
}
return -1;
}
#endif /* __TBB_WIN8UI_SUPPORT */
} // namespace r1
} // namespace detail
} // namespace tbb
#endif // __TBB_tbb_environment_H

167
third_party/tbb/exception.cc vendored Normal file
View file

@ -0,0 +1,167 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/cstdio"
#include "third_party/libcxx/stdexcept" // std::runtime_error
#include "third_party/libcxx/new"
#include "third_party/libcxx/stdexcept"
#define __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN \
(__GLIBCXX__ && __TBB_GLIBCXX_VERSION>=40700 && __TBB_GLIBCXX_VERSION<60000 && TBB_USE_EXCEPTIONS)
#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN
// GCC ABI declarations necessary for a workaround
// MISSING #include <cxxabi.h>
#endif
namespace tbb {
namespace detail {
namespace r1 {
const char* bad_last_alloc::what() const noexcept(true) { return "bad allocation in previous or concurrent attempt"; }
const char* user_abort::what() const noexcept(true) { return "User-initiated abort has terminated this operation"; }
const char* missing_wait::what() const noexcept(true) { return "wait() was not called on the structured_task_group"; }
#if TBB_USE_EXCEPTIONS
template <typename F>
/*[[noreturn]]*/ void do_throw_noexcept(F throw_func) noexcept {
throw_func();
}
/*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept {
throw_func();
#if __GNUC__ == 7
// In release, GCC 7 loses noexcept attribute during tail call optimization.
// The following statement prevents tail call optimization.
volatile bool reach_this_point = true;
suppress_unused_warning(reach_this_point);
#endif
}
bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp
template <typename F>
/*[[noreturn]]*/ void do_throw(F throw_func) {
if (terminate_on_exception()) {
do_throw_noexcept(throw_func);
}
throw_func();
}
#define DO_THROW(exc, init_args) do_throw( []{ throw exc init_args; } );
#else /* !TBB_USE_EXCEPTIONS */
#define PRINT_ERROR_AND_ABORT(exc_name, msg) \
std::fprintf (stderr, "Exception %s with message %s would have been thrown, " \
"if exception handling had not been disabled. Aborting.\n", exc_name, msg); \
std::fflush(stderr); \
std::abort();
#define DO_THROW(exc, init_args) PRINT_ERROR_AND_ABORT(#exc, #init_args)
#endif /* !TBB_USE_EXCEPTIONS */
void throw_exception ( exception_id eid ) {
switch ( eid ) {
case exception_id::bad_alloc: DO_THROW(std::bad_alloc, ()); break;
case exception_id::bad_last_alloc: DO_THROW(bad_last_alloc, ()); break;
case exception_id::user_abort: DO_THROW( user_abort, () ); break;
case exception_id::nonpositive_step: DO_THROW(std::invalid_argument, ("Step must be positive") ); break;
case exception_id::out_of_range: DO_THROW(std::out_of_range, ("Index out of requested size range")); break;
case exception_id::reservation_length_error: DO_THROW(std::length_error, ("Attempt to exceed implementation defined length limits")); break;
case exception_id::missing_wait: DO_THROW(missing_wait, ()); break;
case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break;
case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break;
case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break;
case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break;
default: __TBB_ASSERT ( false, "Unknown exception ID" );
}
__TBB_ASSERT(false, "Unreachable code");
}
/* The "what" should be fairly short, not more than about 128 characters.
Because we control all the call sites to handle_perror, it is pointless
to bullet-proof it for very long strings.
Design note: ADR put this routine off to the side in tbb_misc.cpp instead of
Task.cpp because the throw generates a pathetic lot of code, and ADR wanted
this large chunk of code to be placed on a cold page. */
void handle_perror( int error_code, const char* what ) {
const int BUF_SIZE = 255;
char buf[BUF_SIZE + 1] = { 0 };
std::strncat(buf, what, BUF_SIZE);
std::size_t buf_len = std::strlen(buf);
if (error_code) {
std::strncat(buf, ": ", BUF_SIZE - buf_len);
buf_len = std::strlen(buf);
std::strncat(buf, std::strerror(error_code), BUF_SIZE - buf_len);
buf_len = std::strlen(buf);
}
__TBB_ASSERT(buf_len <= BUF_SIZE && buf[buf_len] == 0, nullptr);
#if TBB_USE_EXCEPTIONS
do_throw([&buf] { throw std::runtime_error(buf); });
#else
PRINT_ERROR_AND_ABORT( "runtime_error", buf);
#endif /* !TBB_USE_EXCEPTIONS */
}
#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN
// Runtime detection and workaround for the GCC bug 62258.
// The problem is that std::rethrow_exception() does not increment a counter
// of active exceptions, causing std::uncaught_exception() to return a wrong value.
// The code is created after, and roughly reflects, the workaround
// at https://gcc.gnu.org/bugzilla/attachment.cgi?id=34683
void fix_broken_rethrow() {
struct gcc_eh_data {
void * caughtExceptions;
unsigned int uncaughtExceptions;
};
gcc_eh_data* eh_data = punned_cast<gcc_eh_data*>( abi::__cxa_get_globals() );
++eh_data->uncaughtExceptions;
}
bool gcc_rethrow_exception_broken() {
bool is_broken;
__TBB_ASSERT( !std::uncaught_exception(),
"gcc_rethrow_exception_broken() must not be called when an exception is active" );
try {
// Throw, catch, and rethrow an exception
try {
throw __TBB_GLIBCXX_VERSION;
} catch(...) {
std::rethrow_exception( std::current_exception() );
}
} catch(...) {
// Check the bug presence
is_broken = std::uncaught_exception();
}
if( is_broken ) fix_broken_rethrow();
__TBB_ASSERT( !std::uncaught_exception(), nullptr);
return is_broken;
}
#else
void fix_broken_rethrow() {}
bool gcc_rethrow_exception_broken() { return false; }
#endif /* __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN */
} // namespace r1
} // namespace detail
} // namespace tbb

3377
third_party/tbb/flow_graph.hh vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,52 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_flow_graph_abstractions_H
#define __TBB_flow_graph_abstractions_H
namespace tbb {
namespace detail {
namespace d1 {
//! Pure virtual template classes that define interfaces for async communication
class graph_proxy {
public:
//! Inform a graph that messages may come from outside, to prevent premature graph completion
virtual void reserve_wait() = 0;
//! Inform a graph that a previous call to reserve_wait is no longer in effect
virtual void release_wait() = 0;
virtual ~graph_proxy() {}
};
template <typename Input>
class receiver_gateway : public graph_proxy {
public:
//! Type of inputing data into FG.
typedef Input input_type;
//! Submit signal from an asynchronous activity to FG.
virtual bool try_put(const input_type&) = 0;
};
} // d1
} // detail
} // tbb
#endif

281
third_party/tbb/global_control.cc vendored Normal file
View file

@ -0,0 +1,281 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/tbb/global_control.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/tbb/spin_mutex.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/threading_control.hh"
#include "third_party/tbb/market.hh"
#include "third_party/tbb/misc.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/set"
namespace tbb {
namespace detail {
namespace r1 {
//! Comparator for a set of global_control objects
struct control_storage_comparator {
bool operator()(const d1::global_control* lhs, const d1::global_control* rhs) const;
};
class control_storage {
friend struct global_control_impl;
friend std::size_t global_control_active_value(int);
friend void global_control_lock();
friend void global_control_unlock();
friend std::size_t global_control_active_value_unsafe(d1::global_control::parameter);
protected:
std::size_t my_active_value{0};
std::set<d1::global_control*, control_storage_comparator, tbb_allocator<d1::global_control*>> my_list{};
spin_mutex my_list_mutex{};
public:
virtual std::size_t default_value() const = 0;
virtual void apply_active(std::size_t new_active) {
my_active_value = new_active;
}
virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const {
return a>b; // prefer max by default
}
virtual std::size_t active_value() {
spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call
return !my_list.empty() ? my_active_value : default_value();
}
std::size_t active_value_unsafe() {
return !my_list.empty() ? my_active_value : default_value();
}
};
class alignas(max_nfs_size) allowed_parallelism_control : public control_storage {
std::size_t default_value() const override {
return max(1U, governor::default_num_threads());
}
bool is_first_arg_preferred(std::size_t a, std::size_t b) const override {
return a<b; // prefer min allowed parallelism
}
void apply_active(std::size_t new_active) override {
control_storage::apply_active(new_active);
__TBB_ASSERT(my_active_value >= 1, nullptr);
// -1 to take external thread into account
threading_control::set_active_num_workers(my_active_value - 1);
}
std::size_t active_value() override {
spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call
if (my_list.empty()) {
return default_value();
}
// non-zero, if market is active
const std::size_t workers = threading_control::max_num_workers();
// We can't exceed market's maximal number of workers.
// +1 to take external thread into account
return workers ? min(workers + 1, my_active_value) : my_active_value;
}
public:
std::size_t active_value_if_present() const {
return !my_list.empty() ? my_active_value : 0;
}
};
class alignas(max_nfs_size) stack_size_control : public control_storage {
std::size_t default_value() const override {
#if _WIN32_WINNT >= 0x0602 /* _WIN32_WINNT_WIN8 */
static auto ThreadStackSizeDefault = [] {
ULONG_PTR hi, lo;
GetCurrentThreadStackLimits(&lo, &hi);
return hi - lo;
}();
return ThreadStackSizeDefault;
#else
return ThreadStackSize;
#endif
}
void apply_active(std::size_t new_active) override {
control_storage::apply_active(new_active);
#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
__TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" );
#endif
}
};
class alignas(max_nfs_size) terminate_on_exception_control : public control_storage {
std::size_t default_value() const override {
return 0;
}
};
class alignas(max_nfs_size) lifetime_control : public control_storage {
bool is_first_arg_preferred(std::size_t, std::size_t) const override {
return false; // not interested
}
std::size_t default_value() const override {
return 0;
}
void apply_active(std::size_t new_active) override {
if (new_active == 1) {
// reserve the market reference
threading_control::register_lifetime_control();
} else if (new_active == 0) { // new_active == 0
threading_control::unregister_lifetime_control(/*blocking_terminate*/ false);
}
control_storage::apply_active(new_active);
}
public:
bool is_empty() {
spin_mutex::scoped_lock lock(my_list_mutex);
return my_list.empty();
}
};
static allowed_parallelism_control allowed_parallelism_ctl;
static stack_size_control stack_size_ctl;
static terminate_on_exception_control terminate_on_exception_ctl;
static lifetime_control lifetime_ctl;
static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl};
void global_control_lock() {
for (auto& ctl : controls) {
ctl->my_list_mutex.lock();
}
}
void global_control_unlock() {
int N = std::distance(std::begin(controls), std::end(controls));
for (int i = N - 1; i >= 0; --i) {
controls[i]->my_list_mutex.unlock();
}
}
std::size_t global_control_active_value_unsafe(d1::global_control::parameter param) {
__TBB_ASSERT_RELEASE(param < d1::global_control::parameter_max, nullptr);
return controls[param]->active_value_unsafe();
}
//! Comparator for a set of global_control objects
inline bool control_storage_comparator::operator()(const d1::global_control* lhs, const d1::global_control* rhs) const {
__TBB_ASSERT_RELEASE(lhs->my_param < d1::global_control::parameter_max , nullptr);
return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs);
}
bool terminate_on_exception() {
return d1::global_control::active_value(d1::global_control::terminate_on_exception) == 1;
}
struct global_control_impl {
private:
static bool erase_if_present(control_storage* const c, d1::global_control& gc) {
auto it = c->my_list.find(&gc);
if (it != c->my_list.end()) {
c->my_list.erase(it);
return true;
}
return false;
}
public:
static void create(d1::global_control& gc) {
__TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr);
control_storage* const c = controls[gc.my_param];
spin_mutex::scoped_lock lock(c->my_list_mutex);
if (c->my_list.empty() || c->is_first_arg_preferred(gc.my_value, c->my_active_value)) {
// to guarantee that apply_active() is called with current active value,
// calls it here and in internal_destroy() under my_list_mutex
c->apply_active(gc.my_value);
}
c->my_list.insert(&gc);
}
static void destroy(d1::global_control& gc) {
__TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr);
control_storage* const c = controls[gc.my_param];
// Concurrent reading and changing global parameter is possible.
spin_mutex::scoped_lock lock(c->my_list_mutex);
__TBB_ASSERT(gc.my_param == d1::global_control::scheduler_handle || !c->my_list.empty(), nullptr);
std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value;
if (!erase_if_present(c, gc)) {
__TBB_ASSERT(gc.my_param == d1::global_control::scheduler_handle , nullptr);
return;
}
if (c->my_list.empty()) {
__TBB_ASSERT(new_active == (std::size_t) - 1, nullptr);
new_active = c->default_value();
} else {
new_active = (*c->my_list.begin())->my_value;
}
if (new_active != old_active) {
c->apply_active(new_active);
}
}
static bool remove_and_check_if_empty(d1::global_control& gc) {
__TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr);
control_storage* const c = controls[gc.my_param];
spin_mutex::scoped_lock lock(c->my_list_mutex);
__TBB_ASSERT(!c->my_list.empty(), nullptr);
erase_if_present(c, gc);
return c->my_list.empty();
}
#if TBB_USE_ASSERT
static bool is_present(d1::global_control& gc) {
__TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr);
control_storage* const c = controls[gc.my_param];
spin_mutex::scoped_lock lock(c->my_list_mutex);
auto it = c->my_list.find(&gc);
if (it != c->my_list.end()) {
return true;
}
return false;
}
#endif // TBB_USE_ASSERT
};
void __TBB_EXPORTED_FUNC create(d1::global_control& gc) {
global_control_impl::create(gc);
}
void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) {
global_control_impl::destroy(gc);
}
bool remove_and_check_if_empty(d1::global_control& gc) {
return global_control_impl::remove_and_check_if_empty(gc);
}
#if TBB_USE_ASSERT
bool is_present(d1::global_control& gc) {
return global_control_impl::is_present(gc);
}
#endif // TBB_USE_ASSERT
std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) {
__TBB_ASSERT_RELEASE(param < d1::global_control::parameter_max, nullptr);
return controls[param]->active_value();
}
} // namespace r1
} // namespace detail
} // namespace tbb

201
third_party/tbb/global_control.hh vendored Normal file
View file

@ -0,0 +1,201 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_global_control_H
#define __TBB_global_control_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_attach.hh"
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#include "third_party/tbb/detail/_template_helpers.hh"
#include "third_party/libcxx/cstddef"
#include "third_party/libcxx/new" // std::nothrow_t
namespace tbb {
namespace detail {
namespace d1 {
class global_control;
class task_scheduler_handle;
}
namespace r1 {
TBB_EXPORT void __TBB_EXPORTED_FUNC create(d1::global_control&);
TBB_EXPORT void __TBB_EXPORTED_FUNC destroy(d1::global_control&);
TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int);
struct global_control_impl;
struct control_storage_comparator;
void release_impl(d1::task_scheduler_handle& handle);
bool finalize_impl(d1::task_scheduler_handle& handle);
TBB_EXPORT void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&);
TBB_EXPORT bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode);
}
namespace d1 {
class global_control {
public:
enum parameter {
max_allowed_parallelism,
thread_stack_size,
terminate_on_exception,
scheduler_handle, // not a public parameter
parameter_max // insert new parameters above this point
};
global_control(parameter p, std::size_t value) :
my_value(value), my_reserved(), my_param(p) {
suppress_unused_warning(my_reserved);
__TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
// For Windows 8 Store* apps it's impossible to set stack size
if (p==thread_stack_size)
return;
#elif __TBB_x86_64 && (_WIN32 || _WIN64)
if (p==thread_stack_size)
__TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range");
#endif
if (my_param==max_allowed_parallelism)
__TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0.");
r1::create(*this);
}
~global_control() {
__TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
// For Windows 8 Store* apps it's impossible to set stack size
if (my_param==thread_stack_size)
return;
#endif
r1::destroy(*this);
}
static std::size_t active_value(parameter p) {
__TBB_ASSERT(p < parameter_max, "Invalid parameter");
return r1::global_control_active_value((int)p);
}
private:
std::size_t my_value;
std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility
parameter my_param;
friend struct r1::global_control_impl;
friend struct r1::control_storage_comparator;
};
//! Finalization options.
//! Outside of the class to avoid extensive friendship.
static constexpr std::intptr_t release_nothrowing = 0;
static constexpr std::intptr_t finalize_nothrowing = 1;
static constexpr std::intptr_t finalize_throwing = 2;
//! User side wrapper for a task scheduler lifetime control object
class task_scheduler_handle {
public:
//! Creates an empty task_scheduler_handle
task_scheduler_handle() = default;
//! Creates an attached instance of task_scheduler_handle
task_scheduler_handle(attach) {
r1::get(*this);
}
//! Release a reference if any
~task_scheduler_handle() {
release();
}
//! No copy
task_scheduler_handle(const task_scheduler_handle& other) = delete;
task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete;
//! Move only
task_scheduler_handle(task_scheduler_handle&& other) noexcept {
std::swap(m_ctl, other.m_ctl);
}
task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept {
std::swap(m_ctl, other.m_ctl);
return *this;
};
//! Checks if the task_scheduler_handle is empty
explicit operator bool() const noexcept {
return m_ctl != nullptr;
}
//! Release the reference and deactivate handle
void release() {
if (m_ctl != nullptr) {
r1::finalize(*this, release_nothrowing);
m_ctl = nullptr;
}
}
private:
friend void r1::release_impl(task_scheduler_handle& handle);
friend bool r1::finalize_impl(task_scheduler_handle& handle);
friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&);
friend void finalize(task_scheduler_handle&);
friend bool finalize(task_scheduler_handle&, const std::nothrow_t&) noexcept;
global_control* m_ctl{nullptr};
};
#if TBB_USE_EXCEPTIONS
//! Waits for worker threads termination. Throws exception on error.
inline void finalize(task_scheduler_handle& handle) {
try_call([&] {
if (handle.m_ctl != nullptr) {
bool finalized = r1::finalize(handle, finalize_throwing);
__TBB_ASSERT_EX(finalized, "r1::finalize did not respect finalize_throwing ?");
}
}).on_completion([&] {
__TBB_ASSERT(!handle, "The handle should be empty after finalize");
});
}
#endif
//! Waits for worker threads termination. Returns false on error.
inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept {
bool finalized = true;
if (handle.m_ctl != nullptr) {
finalized = r1::finalize(handle, finalize_nothrowing);
}
__TBB_ASSERT(!handle, "The handle should be empty after finalize");
return finalized;
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::global_control;
using detail::d1::attach;
using detail::d1::finalize;
using detail::d1::task_scheduler_handle;
using detail::r1::unsafe_wait;
} // namespace v1
} // namespace tbb
#endif // __TBB_global_control_H

580
third_party/tbb/governor.cc vendored Normal file
View file

@ -0,0 +1,580 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/threading_control.hh"
#include "third_party/tbb/main.hh"
#include "third_party/tbb/thread_data.hh"
#include "third_party/tbb/market.hh"
#include "third_party/tbb/arena.hh"
#include "third_party/tbb/dynamic_link.hh"
#include "third_party/tbb/concurrent_monitor.hh"
#include "third_party/tbb/thread_dispatcher.hh"
#include "third_party/tbb/task_group.hh"
#include "third_party/tbb/global_control.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/tbb/info.hh"
#include "third_party/tbb/task_dispatcher.hh"
#include "third_party/libcxx/cstdio"
#include "third_party/libcxx/cstdlib"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/algorithm"
namespace tbb {
namespace detail {
namespace r1 {
void clear_address_waiter_table();
//! global_control.cpp contains definition
bool remove_and_check_if_empty(d1::global_control& gc);
bool is_present(d1::global_control& gc);
namespace rml {
tbb_server* make_private_server( tbb_client& client );
} // namespace rml
namespace system_topology {
void destroy();
}
//------------------------------------------------------------------------
// governor
//------------------------------------------------------------------------
void governor::acquire_resources () {
#if __TBB_USE_POSIX
int status = theTLS.create(auto_terminate);
#else
int status = theTLS.create();
#endif
if( status )
handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
detect_cpu_features(cpu_features);
is_rethrow_broken = gcc_rethrow_exception_broken();
}
void governor::release_resources () {
theRMLServerFactory.close();
destroy_process_mask();
__TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
int status = theTLS.destroy();
if( status )
runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
clear_address_waiter_table();
system_topology::destroy();
dynamic_unlink_all();
}
rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
rml::tbb_server* server = nullptr;
if( !UsePrivateRML ) {
::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
if( status != ::rml::factory::st_success ) {
UsePrivateRML = true;
runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
}
}
if ( !server ) {
__TBB_ASSERT( UsePrivateRML, nullptr);
server = rml::make_private_server( client );
}
__TBB_ASSERT( server, "Failed to create RML server" );
return server;
}
void governor::one_time_init() {
if ( !__TBB_InitOnce::initialization_done() ) {
DoOneTimeInitialization();
}
}
bool governor::does_client_join_workers(const rml::tbb_client &client) {
return ((const thread_dispatcher&)client).must_join_workers();
}
/*
There is no portable way to get stack base address in Posix, however the modern
Linux versions provide pthread_attr_np API that can be used to obtain thread's
stack size and base address. Unfortunately even this function does not provide
enough information for the main thread on IA-64 architecture (RSE spill area
and memory stack are allocated as two separate discontinuous chunks of memory),
and there is no portable way to discern the main and the secondary threads.
Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
all threads and use the current stack top as the stack base. This simplified
approach is based on the following assumptions:
1) If the default stack size is insufficient for the user app needs, the
required amount will be explicitly specified by the user at the point of the
TBB scheduler initialization (as an argument to tbb::task_scheduler_init
constructor).
2) When an external thread initializes the scheduler, it has enough space on its
stack. Here "enough" means "at least as much as worker threads have".
3) If the user app strives to conserve the memory by cutting stack size, it
should do this for TBB workers too (as in the #1).
*/
static std::uintptr_t get_stack_base(std::size_t stack_size) {
// Stacks are growing top-down. Highest address is called "stack base",
// and the lowest is "stack limit".
#if __TBB_USE_WINAPI
suppress_unused_warning(stack_size);
NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
__TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
#else
// There is no portable way to get stack base address in Posix, so we use
// non-portable method (on all modern Linux) or the simplified approach
// based on the common sense assumptions. The most important assumption
// is that the main thread's stack size is not less than that of other threads.
// Points to the lowest addressable byte of a stack.
void* stack_limit = nullptr;
#if __linux__ && !__bg__
size_t np_stack_size = 0;
pthread_attr_t np_attr_stack;
if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
__TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
}
pthread_attr_destroy(&np_attr_stack);
}
#endif /* __linux__ */
std::uintptr_t stack_base{};
if (stack_limit) {
stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
} else {
// Use an anchor as a base stack address.
int anchor{};
stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
}
return stack_base;
#endif /* __TBB_USE_WINAPI */
}
#if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
static void register_external_thread_destructor() {
struct thread_destructor {
~thread_destructor() {
governor::terminate_external_thread();
}
};
// ~thread_destructor() will be call during the calling thread termination
static thread_local thread_destructor thr_destructor;
}
#endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
void governor::init_external_thread() {
one_time_init();
// Create new scheduler instance with arena
int num_slots = default_num_threads();
// TODO_REVAMP: support an external thread without an implicit arena
int num_reserved_slots = 1;
unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
std::size_t stack_size = 0;
threading_control* thr_control = threading_control::register_public_reference();
arena& a = arena::create(thr_control, num_slots, num_reserved_slots, arena_priority_level);
// External thread always occupies the first slot
thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
td.attach_arena(a, /*slot index*/ 0);
__TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr);
stack_size = a.my_threading_control->worker_stack_size();
std::uintptr_t stack_base = get_stack_base(stack_size);
task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size));
td.my_arena_slot->occupy();
thr_control->register_thread(td);
set_thread_data(td);
#if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
// The external thread destructor is called from dllMain but it is not available with a static build.
// Therefore, we need to register the current thread to call the destructor during thread termination.
register_external_thread_destructor();
#endif
}
void governor::auto_terminate(void* tls) {
__TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
get_thread_data_if_initialized() == tls, nullptr);
if (tls) {
thread_data* td = static_cast<thread_data*>(tls);
auto clear_tls = [td] {
td->~thread_data();
cache_aligned_deallocate(td);
clear_thread_data();
};
// Only external thread can be inside an arena during termination.
if (td->my_arena_slot) {
arena* a = td->my_arena;
threading_control* thr_control = a->my_threading_control;
// If the TLS slot is already cleared by OS or underlying concurrency
// runtime, restore its value to properly clean up arena
if (!is_thread_data_set(td)) {
set_thread_data(*td);
}
a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
td->leave_task_dispatcher();
td->my_arena_slot->release();
// Release an arena
a->on_thread_leaving(arena::ref_external);
thr_control->unregister_thread(*td);
// The tls should be cleared before market::release because
// market can destroy the tls key if we keep the last reference
clear_tls();
// If there was an associated arena, it added a public market reference
thr_control->unregister_public_reference(/* blocking terminate =*/ false);
} else {
clear_tls();
}
}
__TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr);
}
void governor::initialize_rml_factory () {
::rml::factory::status_type res = theRMLServerFactory.open();
UsePrivateRML = res != ::rml::factory::st_success;
}
void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
}
void release_impl(d1::task_scheduler_handle& handle) {
if (handle.m_ctl != nullptr) {
handle.m_ctl->~global_control();
deallocate_memory(handle.m_ctl);
handle.m_ctl = nullptr;
}
}
bool finalize_impl(d1::task_scheduler_handle& handle) {
__TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle");
__TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
bool ok = true; // ok if threading_control does not exist yet
if (threading_control::is_present()) {
thread_data* td = governor::get_thread_data_if_initialized();
if (td) {
task_dispatcher* task_disp = td->my_task_dispatcher;
__TBB_ASSERT(task_disp, nullptr);
if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
governor::auto_terminate(td);
}
}
if (remove_and_check_if_empty(*handle.m_ctl)) {
ok = threading_control::unregister_lifetime_control(/*blocking_terminate*/ true);
} else {
ok = false;
}
}
return ok;
}
bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
if (mode == d1::release_nothrowing) {
release_impl(handle);
return true;
} else {
bool ok = finalize_impl(handle);
// TODO: it is unsafe when finalize is called concurrently and further library unload
release_impl(handle);
if (mode == d1::finalize_throwing && !ok) {
throw_exception(exception_id::unsafe_wait);
}
return ok;
}
}
#if __TBB_ARENA_BINDING
#if __TBB_WEAK_SYMBOLS_PRESENT
#pragma weak __TBB_internal_initialize_system_topology
#pragma weak __TBB_internal_destroy_system_topology
#pragma weak __TBB_internal_allocate_binding_handler
#pragma weak __TBB_internal_deallocate_binding_handler
#pragma weak __TBB_internal_apply_affinity
#pragma weak __TBB_internal_restore_affinity
#pragma weak __TBB_internal_get_default_concurrency
extern "C" {
void __TBB_internal_initialize_system_topology(
size_t groups_num,
int& numa_nodes_count, int*& numa_indexes_list,
int& core_types_count, int*& core_types_indexes_list
);
void __TBB_internal_destroy_system_topology( );
//TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
}
#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
// Stubs that will be used if TBBbind library is unavailable.
static void dummy_destroy_system_topology ( ) { }
static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
static void dummy_deallocate_binding_handler ( binding_handler* ) { }
static void dummy_apply_affinity ( binding_handler*, int ) { }
static void dummy_restore_affinity ( binding_handler*, int ) { }
static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
// Handlers for communication with TBBbind
static void (*initialize_system_topology_ptr)(
size_t groups_num,
int& numa_nodes_count, int*& numa_indexes_list,
int& core_types_count, int*& core_types_indexes_list
) = nullptr;
static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology;
static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
= dummy_allocate_binding_handler;
static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
= dummy_deallocate_binding_handler;
static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
= dummy_apply_affinity;
static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
= dummy_restore_affinity;
int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
= dummy_get_default_concurrency;
#if _WIN32 || _WIN64 || __unix__
// Table describing how to link the handlers.
static const dynamic_link_descriptor TbbBindLinkTable[] = {
DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr),
DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
};
static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
#if TBB_USE_DEBUG
#define DEBUG_SUFFIX "_debug"
#else
#define DEBUG_SUFFIX
#endif /* TBB_USE_DEBUG */
#if _WIN32 || _WIN64
#define LIBRARY_EXTENSION ".dll"
#define LIBRARY_PREFIX
#elif __unix__
#define LIBRARY_EXTENSION __TBB_STRING(.so.3)
#define LIBRARY_PREFIX "lib"
#endif /* __unix__ */
#define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
#define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
#define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION
#endif /* _WIN32 || _WIN64 || __unix__ */
// Representation of system hardware topology information on the TBB side.
// System topology may be initialized by third-party component (e.g. hwloc)
// or just filled in with default stubs.
namespace system_topology {
constexpr int automatic = -1;
static std::atomic<do_once_state> initialization_state;
namespace {
int numa_nodes_count = 0;
int* numa_nodes_indexes = nullptr;
int core_types_count = 0;
int* core_types_indexes = nullptr;
const char* load_tbbbind_shared_object() {
#if _WIN32 || _WIN64 || __unix__
#if _WIN32 && !_WIN64
// For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
SYSTEM_INFO si;
GetNativeSystemInfo(&si);
if (si.dwNumberOfProcessors > 32) return nullptr;
#endif /* _WIN32 && !_WIN64 */
for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) {
return tbbbind_version;
}
}
#endif /* _WIN32 || _WIN64 || __unix__ */
return nullptr;
}
int processor_groups_num() {
#if _WIN32
return NumberOfProcessorGroups();
#else
// Stub to improve code readability by reducing number of the compile-time conditions
return 1;
#endif
}
} // internal namespace
// Tries to load TBBbind library API, if success, gets NUMA topology information from it,
// in another case, fills NUMA topology by stubs.
void initialization_impl() {
governor::one_time_init();
if (const char* tbbbind_name = load_tbbbind_shared_object()) {
initialize_system_topology_ptr(
processor_groups_num(),
numa_nodes_count, numa_nodes_indexes,
core_types_count, core_types_indexes
);
PrintExtraVersionInfo("TBBBIND", tbbbind_name);
return;
}
static int dummy_index = automatic;
numa_nodes_count = 1;
numa_nodes_indexes = &dummy_index;
core_types_count = 1;
core_types_indexes = &dummy_index;
PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
}
void initialize() {
atomic_do_once(initialization_impl, initialization_state);
}
void destroy() {
destroy_system_topology_ptr();
}
} // namespace system_topology
binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
system_topology::initialize();
return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
}
void destroy_binding_handler(binding_handler* handler_ptr) {
__TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
deallocate_binding_handler_ptr(handler_ptr);
}
void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
__TBB_ASSERT(slot_index >= 0, "Negative thread index");
__TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
apply_affinity_ptr(handler_ptr, slot_index);
}
void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
__TBB_ASSERT(slot_index >= 0, "Negative thread index");
__TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
restore_affinity_ptr(handler_ptr, slot_index);
}
unsigned __TBB_EXPORTED_FUNC numa_node_count() {
system_topology::initialize();
return system_topology::numa_nodes_count;
}
void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
system_topology::initialize();
std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
}
int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
if (node_id >= 0) {
system_topology::initialize();
int result = get_default_concurrency_ptr(
node_id,
/*core_type*/system_topology::automatic,
/*threads_per_core*/system_topology::automatic
);
if (result > 0) return result;
}
return governor::default_num_threads();
}
unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
system_topology::initialize();
return system_topology::core_types_count;
}
void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
system_topology::initialize();
std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
}
void constraints_assertion(d1::constraints c) {
bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
__TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
"Wrong max_threads_per_core constraints field value.");
auto numa_nodes_begin = system_topology::numa_nodes_indexes;
auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
__TBB_ASSERT_RELEASE(
c.numa_id == system_topology::automatic ||
(is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
"The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
int* core_types_begin = system_topology::core_types_indexes;
int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
__TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
(is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
"The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
}
int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
constraints_assertion(c);
if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
system_topology::initialize();
return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
}
return governor::default_num_threads();
}
int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
return system_topology::automatic;
}
#endif /* __TBB_ARENA_BINDING */
} // namespace r1
} // namespace detail
} // namespace tbb

157
third_party/tbb/governor.hh vendored Normal file
View file

@ -0,0 +1,157 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_governor_H
#define _TBB_governor_H
#include "third_party/tbb/rml_tbb.hh"
#include "third_party/tbb/misc.hh" // for AvailableHwConcurrency
#include "third_party/tbb/tls.hh"
namespace tbb {
namespace detail {
namespace r1 {
class market;
class thread_data;
class __TBB_InitOnce;
#if __TBB_USE_ITT_NOTIFY
//! Defined in profiling.cpp
extern bool ITT_Present;
#endif
typedef std::size_t stack_size_type;
//------------------------------------------------------------------------
// Class governor
//------------------------------------------------------------------------
//! The class handles access to the single instance of market, and to TLS to keep scheduler instances.
/** It also supports automatic on-demand initialization of the TBB scheduler.
The class contains only static data members and methods.*/
class governor {
private:
friend class __TBB_InitOnce;
friend class thread_dispatcher;
friend class threading_control_impl;
// TODO: consider using thread_local (measure performance and side effects)
//! TLS for scheduler instances associated with individual threads
static basic_tls<thread_data*> theTLS;
// TODO (TBB_REVAMP_TODO): reconsider constant names
static rml::tbb_factory theRMLServerFactory;
static bool UsePrivateRML;
// Flags for runtime-specific conditions
static cpu_features_type cpu_features;
static bool is_rethrow_broken;
//! Create key for thread-local storage and initialize RML.
static void acquire_resources ();
//! Destroy the thread-local storage key and deinitialize RML.
static void release_resources ();
static rml::tbb_server* create_rml_server ( rml::tbb_client& );
public:
static unsigned default_num_threads () {
// Caches the maximal level of parallelism supported by the hardware
static unsigned num_threads = AvailableHwConcurrency();
return num_threads;
}
static std::size_t default_page_size () {
// Caches the size of OS regular memory page
static std::size_t page_size = DefaultSystemPageSize();
return page_size;
}
static void one_time_init();
//! Processes scheduler initialization request (possibly nested) in an external thread
/** If necessary creates new instance of arena and/or local scheduler.
The auto_init argument specifies if the call is due to automatic initialization. **/
static void init_external_thread();
//! The routine to undo automatic initialization.
/** The signature is written with void* so that the routine
can be the destructor argument to pthread_key_create. */
static void auto_terminate(void* tls);
//! Obtain the thread-local instance of the thread data.
/** If the scheduler has not been initialized yet, initialization is done automatically.
Note that auto-initialized scheduler instance is destroyed only when its thread terminates. **/
static thread_data* get_thread_data() {
thread_data* td = theTLS.get();
if (td) {
return td;
}
init_external_thread();
td = theTLS.get();
__TBB_ASSERT(td, nullptr);
return td;
}
static void set_thread_data(thread_data& td) {
theTLS.set(&td);
}
static void clear_thread_data() {
theTLS.set(nullptr);
}
static thread_data* get_thread_data_if_initialized () {
return theTLS.get();
}
static bool is_thread_data_set(thread_data* td) {
return theTLS.get() == td;
}
//! Undo automatic initialization if necessary; call when a thread exits.
static void terminate_external_thread() {
auto_terminate(get_thread_data_if_initialized());
}
static void initialize_rml_factory ();
static bool does_client_join_workers (const rml::tbb_client &client);
static bool speculation_enabled() { return cpu_features.rtm_enabled; }
#if __TBB_WAITPKG_INTRINSICS_PRESENT
static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; }
#endif
static bool rethrow_exception_broken() { return is_rethrow_broken; }
static bool is_itt_present() {
#if __TBB_USE_ITT_NOTIFY
return ITT_Present;
#else
return false;
#endif
}
}; // class governor
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_governor_H */

126
third_party/tbb/info.hh vendored Normal file
View file

@ -0,0 +1,126 @@
// clang-format off
/*
Copyright (c) 2019-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_info_H
#define __TBB_info_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_namespace_injection.hh"
#if __TBB_ARENA_BINDING
#include "third_party/libcxx/vector"
#include "third_party/libcxx/cstdint"
namespace tbb {
namespace detail {
namespace d1{
using numa_node_id = int;
using core_type_id = int;
// TODO: consider version approach to resolve backward compatibility potential issues.
struct constraints {
#if !__TBB_CPP20_PRESENT
constraints(numa_node_id id = -1, int maximal_concurrency = -1)
: numa_id(id)
, max_concurrency(maximal_concurrency)
{}
#endif /*!__TBB_CPP20_PRESENT*/
constraints& set_numa_id(numa_node_id id) {
numa_id = id;
return *this;
}
constraints& set_max_concurrency(int maximal_concurrency) {
max_concurrency = maximal_concurrency;
return *this;
}
constraints& set_core_type(core_type_id id) {
core_type = id;
return *this;
}
constraints& set_max_threads_per_core(int threads_number) {
max_threads_per_core = threads_number;
return *this;
}
numa_node_id numa_id = -1;
int max_concurrency = -1;
core_type_id core_type = -1;
int max_threads_per_core = -1;
};
} // namespace d1
namespace r1 {
TBB_EXPORT unsigned __TBB_EXPORTED_FUNC numa_node_count();
TBB_EXPORT void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array);
TBB_EXPORT int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id);
// Reserved fields are required to save binary backward compatibility in case of future changes.
// They must be defined to 0 at this moment.
TBB_EXPORT unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0);
TBB_EXPORT void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0);
TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0);
TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0);
} // namespace r1
namespace d1 {
inline std::vector<numa_node_id> numa_nodes() {
std::vector<numa_node_id> node_indices(r1::numa_node_count());
r1::fill_numa_indices(node_indices.data());
return node_indices;
}
inline int default_concurrency(numa_node_id id = -1) {
return r1::numa_default_concurrency(id);
}
inline std::vector<core_type_id> core_types() {
std::vector<int> core_type_indexes(r1::core_type_count());
r1::fill_core_type_indices(core_type_indexes.data());
return core_type_indexes;
}
inline int default_concurrency(constraints c) {
if (c.max_concurrency > 0) { return c.max_concurrency; }
return r1::constraints_default_concurrency(c);
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::numa_node_id;
using detail::d1::core_type_id;
namespace info {
using detail::d1::numa_nodes;
using detail::d1::core_types;
using detail::d1::default_concurrency;
} // namespace info
} // namespace v1
} // namespace tbb
#endif /*__TBB_ARENA_BINDING*/
#endif /*__TBB_info_H*/

234
third_party/tbb/intrusive_list.hh vendored Normal file
View file

@ -0,0 +1,234 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_intrusive_list_H
#define _TBB_intrusive_list_H
#include "third_party/tbb/detail/_intrusive_list_node.hh"
namespace tbb {
namespace detail {
namespace r1 {
using d1::intrusive_list_node;
//! List of element of type T, where T is derived from intrusive_list_node
/** The class is not thread safe. **/
template <class List, class T>
class intrusive_list_base {
//! Pointer to the head node
intrusive_list_node my_head;
//! Number of list elements
std::size_t my_size;
static intrusive_list_node& node ( T& item ) { return List::node(item); }
static T& item ( intrusive_list_node* node ) { return List::item(node); }
static const T& item( const intrusive_list_node* node ) { return List::item(node); }
template <typename DereferenceType>
class iterator_impl {
static_assert(std::is_same<DereferenceType, T>::value ||
std::is_same<DereferenceType, const T>::value,
"Incorrect DereferenceType in iterator_impl");
using pointer_type = typename std::conditional<std::is_same<DereferenceType, T>::value,
intrusive_list_node*,
const intrusive_list_node*>::type;
public:
iterator_impl() : my_pos(nullptr) {}
iterator_impl( pointer_type pos ) : my_pos(pos) {}
iterator_impl& operator++() {
my_pos = my_pos->my_next_node;
return *this;
}
iterator_impl operator++( int ) {
iterator_impl it(*this);
++*this;
return it;
}
iterator_impl& operator--() {
my_pos = my_pos->my_prev_node;
return *this;
}
iterator_impl operator--( int ) {
iterator_impl it(*this);
--*this;
return it;
}
bool operator==( const iterator_impl& rhs ) const {
return my_pos == rhs.my_pos;
}
bool operator!=( const iterator_impl& rhs ) const {
return my_pos != rhs.my_pos;
}
DereferenceType& operator*() const {
return intrusive_list_base::item(my_pos);
}
DereferenceType* operator->() const {
return &intrusive_list_base::item(my_pos);
}
private:
// Node the iterator points to at the moment
pointer_type my_pos;
}; // class iterator_impl
void assert_ok () const {
__TBB_ASSERT( (my_head.my_prev_node == &my_head && !my_size) ||
(my_head.my_next_node != &my_head && my_size >0), "intrusive_list_base corrupted" );
#if TBB_USE_ASSERT >= 2
std::size_t i = 0;
for ( intrusive_list_node *n = my_head.my_next_node; n != &my_head; n = n->my_next_node )
++i;
__TBB_ASSERT( my_size == i, "Wrong size" );
#endif /* TBB_USE_ASSERT >= 2 */
}
public:
using iterator = iterator_impl<T>;
using const_iterator = iterator_impl<const T>;
intrusive_list_base () : my_size(0) {
my_head.my_prev_node = &my_head;
my_head.my_next_node = &my_head;
}
bool empty () const { return my_head.my_next_node == &my_head; }
std::size_t size () const { return my_size; }
iterator begin () { return iterator(my_head.my_next_node); }
iterator end () { return iterator(&my_head); }
const_iterator begin () const { return const_iterator(my_head.my_next_node); }
const_iterator end () const { return const_iterator(&my_head); }
void push_front ( T& val ) {
__TBB_ASSERT( node(val).my_prev_node == &node(val) && node(val).my_next_node == &node(val),
"Object with intrusive list node can be part of only one intrusive list simultaneously" );
// An object can be part of only one intrusive list at the given moment via the given node member
node(val).my_prev_node = &my_head;
node(val).my_next_node = my_head.my_next_node;
my_head.my_next_node->my_prev_node = &node(val);
my_head.my_next_node = &node(val);
++my_size;
assert_ok();
}
void remove( T& val ) {
__TBB_ASSERT( node(val).my_prev_node != &node(val) && node(val).my_next_node != &node(val), "Element to remove is not in the list" );
__TBB_ASSERT( node(val).my_prev_node->my_next_node == &node(val) && node(val).my_next_node->my_prev_node == &node(val), "Element to remove is not in the list" );
--my_size;
node(val).my_next_node->my_prev_node = node(val).my_prev_node;
node(val).my_prev_node->my_next_node = node(val).my_next_node;
#if TBB_USE_ASSERT
node(val).my_prev_node = node(val).my_next_node = &node(val);
#endif
assert_ok();
}
iterator erase ( iterator it ) {
T& val = *it;
++it;
remove( val );
return it;
}
}; // intrusive_list_base
#if __TBB_TODO
// With standard compliant compilers memptr_intrusive_list could be named simply intrusive_list,
// and inheritance based intrusive_list version would become its partial specialization.
// Here are the corresponding declarations:
struct dummy_intrusive_list_item { intrusive_list_node my_node; };
template <class T, class U = dummy_intrusive_list_item, intrusive_list_node U::*NodePtr = &dummy_intrusive_list_item::my_node>
class intrusive_list : public intrusive_list_base<intrusive_list<T, U, NodePtr>, T>;
template <class T>
class intrusive_list<T, dummy_intrusive_list_item, &dummy_intrusive_list_item::my_node>
: public intrusive_list_base<intrusive_list<T>, T>;
#endif /* __TBB_TODO */
//! Double linked list of items of type T containing a member of type intrusive_list_node.
/** NodePtr is a member pointer to the node data field. Class U is either T or
a base class of T containing the node member. Default values exist for the sake
of a partial specialization working with inheritance case.
The list does not have ownership of its items. Its purpose is to avoid dynamic
memory allocation when forming lists of existing objects.
The class is not thread safe. **/
template <class T, class U, intrusive_list_node U::*NodePtr>
class memptr_intrusive_list : public intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T>
{
friend class intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T>;
static intrusive_list_node& node ( T& val ) { return val.*NodePtr; }
static T& item ( intrusive_list_node* node ) {
// Cannot use __TBB_offsetof (and consequently __TBB_get_object_ref) macro
// with *NodePtr argument because gcc refuses to interpret pasted "->" and "*"
// as member pointer dereferencing operator, and explicit usage of ## in
// __TBB_offsetof implementation breaks operations with normal member names.
return *reinterpret_cast<T*>((char*)node - ((ptrdiff_t)&(reinterpret_cast<T*>(0x1000)->*NodePtr) - 0x1000));
}
static const T& item( const intrusive_list_node* node ) {
return item(const_cast<intrusive_list_node*>(node));
}
}; // intrusive_list<T, U, NodePtr>
//! Double linked list of items of type T that is derived from intrusive_list_node class.
/** The list does not have ownership of its items. Its purpose is to avoid dynamic
memory allocation when forming lists of existing objects.
The class is not thread safe. **/
template <class T>
class intrusive_list : public intrusive_list_base<intrusive_list<T>, T>
{
friend class intrusive_list_base<intrusive_list<T>, T>;
static intrusive_list_node& node ( T& val ) { return val; }
static T& item ( intrusive_list_node* node ) { return *static_cast<T*>(node); }
static const T& item( const intrusive_list_node* node ) { return *static_cast<const T*>(node); }
}; // intrusive_list<T>
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_intrusive_list_H */

70
third_party/tbb/itt_notify.cc vendored Normal file
View file

@ -0,0 +1,70 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#if __TBB_USE_ITT_NOTIFY
#if _WIN32||_WIN64
#ifndef UNICODE
#define UNICODE
#endif
#else
#pragma weak dlopen
#pragma weak dlsym
#pragma weak dlerror
#endif /* WIN */
#if __TBB_BUILD
extern "C" void ITT_DoOneTimeInitialization();
#define __itt_init_ittlib_name(x,y) (ITT_DoOneTimeInitialization(), true)
#elif __TBBMALLOC_BUILD
extern "C" void MallocInitializeITT();
#define __itt_init_ittlib_name(x,y) (MallocInitializeITT(), true)
#else
#error This file is expected to be used for either TBB or TBB allocator build.
#endif // __TBB_BUILD
// MISSING #include "tools_api/ittnotify_static.c"
namespace tbb {
namespace detail {
namespace r1 {
/** This extra proxy method is necessary since __itt_init_lib is declared as static **/
int __TBB_load_ittnotify() {
#if !(_WIN32||_WIN64)
// tool_api crashes without dlopen, check that it's present. Common case
// for lack of dlopen is static binaries, i.e. ones build with -static.
if (dlopen == nullptr)
return 0;
#endif
return __itt_init_ittlib(nullptr, // groups for:
(__itt_group_id)(__itt_group_sync // prepare/cancel/acquired/releasing
| __itt_group_thread // name threads
| __itt_group_stitch // stack stitching
| __itt_group_structure
));
}
} //namespace r1
} //namespace detail
} // namespace tbb
#endif /* __TBB_USE_ITT_NOTIFY */

118
third_party/tbb/itt_notify.hh vendored Normal file
View file

@ -0,0 +1,118 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_ITT_NOTIFY
#define _TBB_ITT_NOTIFY
#include "third_party/tbb/detail/_config.hh"
#if __TBB_USE_ITT_NOTIFY
#if _WIN32||_WIN64
#ifndef UNICODE
#define UNICODE
#endif
#endif /* WIN */
#ifndef INTEL_ITTNOTIFY_API_PRIVATE
#define INTEL_ITTNOTIFY_API_PRIVATE
#endif
// MISSING #include "tools_api/ittnotify.h"
// MISSING #include "tools_api/legacy/ittnotify.h"
extern "C" void __itt_fini_ittlib(void);
extern "C" void __itt_release_resources(void);
#if _WIN32||_WIN64
#undef _T
#endif /* WIN */
#endif /* __TBB_USE_ITT_NOTIFY */
#if !ITT_CALLER_NULL
#define ITT_CALLER_NULL ((__itt_caller)0)
#endif
namespace tbb {
namespace detail {
namespace r1 {
//! Unicode support
#if (_WIN32||_WIN64)
//! Unicode character type. Always wchar_t on Windows.
/** We do not use typedefs from Windows TCHAR family to keep consistence of TBB coding style. **/
using tchar = wchar_t;
//! Standard Windows macro to markup the string literals.
#define _T(string_literal) L ## string_literal
#else /* !WIN */
using tchar = char;
//! Standard Windows style macro to markup the string literals.
#define _T(string_literal) string_literal
#endif /* !WIN */
//! Display names of internal synchronization types
extern const tchar
*SyncType_Scheduler;
//! Display names of internal synchronization components/scenarios
extern const tchar
*SyncObj_ContextsList
;
#if __TBB_USE_ITT_NOTIFY
// const_cast<void*>() is necessary to cast off volatility
#define ITT_NOTIFY(name,obj) __itt_##name(const_cast<void*>(static_cast<volatile void*>(obj)))
#define ITT_THREAD_SET_NAME(name) __itt_thread_set_name(name)
#define ITT_FINI_ITTLIB() __itt_fini_ittlib()
#define ITT_RELEASE_RESOURCES() __itt_release_resources()
#define ITT_SYNC_CREATE(obj, type, name) __itt_sync_create((void*)(obj), type, name, 2)
#define ITT_STACK_CREATE(obj) obj = __itt_stack_caller_create()
#define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0)
#define ITT_CALLEE_ENTER(cond, t, obj) if(cond) {\
__itt_stack_callee_enter(static_cast<__itt_caller>(obj));\
__itt_sync_acquired(t);\
}
#define ITT_CALLEE_LEAVE(cond, obj) (cond) ? __itt_stack_callee_leave(static_cast<__itt_caller>(obj)) : ((void)0)
#define ITT_TASK_GROUP(obj,name,parent) r1::itt_make_task_group(d1::ITT_DOMAIN_MAIN,(void*)(obj),ALGORITHM,(void*)(parent),(parent!=nullptr) ? ALGORITHM : FLOW_NULL,name)
#define ITT_TASK_BEGIN(obj,name,id) r1::itt_task_begin(d1::ITT_DOMAIN_MAIN,(void*)(id),ALGORITHM,(void*)(obj),ALGORITHM,name)
#define ITT_TASK_END r1::itt_task_end(d1::ITT_DOMAIN_MAIN)
#else /* !__TBB_USE_ITT_NOTIFY */
#define ITT_NOTIFY(name,obj) ((void)0)
#define ITT_THREAD_SET_NAME(name) ((void)0)
#define ITT_FINI_ITTLIB() ((void)0)
#define ITT_RELEASE_RESOURCES() ((void)0)
#define ITT_SYNC_CREATE(obj, type, name) ((void)0)
#define ITT_STACK_CREATE(obj) ((void)0)
#define ITT_STACK_DESTROY(obj) ((void)0)
#define ITT_CALLEE_ENTER(cond, t, obj) ((void)0)
#define ITT_CALLEE_LEAVE(cond, obj) ((void)0)
#define ITT_TASK_GROUP(type,name,parent) ((void)0)
#define ITT_TASK_BEGIN(type,name,id) ((void)0)
#define ITT_TASK_END ((void)0)
#endif /* !__TBB_USE_ITT_NOTIFY */
int __TBB_load_ittnotify();
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_ITT_NOTIFY */

247
third_party/tbb/mailbox.hh vendored Normal file
View file

@ -0,0 +1,247 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_mailbox_H
#define _TBB_mailbox_H
#include "third_party/tbb/cache_aligned_allocator.hh"
#include "third_party/tbb/detail/_small_object_pool.hh"
#include "third_party/tbb/scheduler_common.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace r1 {
struct task_proxy : public d1::task {
static const intptr_t pool_bit = 1<<0;
static const intptr_t mailbox_bit = 1<<1;
static const intptr_t location_mask = pool_bit | mailbox_bit;
/* All but two low-order bits represent a (task*).
Two low-order bits mean:
1 = proxy is/was/will be in task pool
2 = proxy is/was/will be in mailbox */
std::atomic<intptr_t> task_and_tag;
//! Pointer to next task_proxy in a mailbox
std::atomic<task_proxy*> next_in_mailbox;
//! Mailbox to which this was mailed.
mail_outbox* outbox;
//! Task affinity id which is referenced
d1::slot_id slot;
d1::small_object_allocator allocator;
//! True if the proxy is stored both in its sender's pool and in the destination mailbox.
static bool is_shared ( intptr_t tat ) {
return (tat & location_mask) == location_mask;
}
//! Returns a pointer to the encapsulated task or nullptr.
static task* task_ptr ( intptr_t tat ) {
return (task*)(tat & ~location_mask);
}
//! Returns a pointer to the encapsulated task or nullptr, and frees proxy if necessary.
template<intptr_t from_bit>
inline task* extract_task () {
// __TBB_ASSERT( prefix().extra_state == es_task_proxy, "Normal task misinterpreted as a proxy?" );
intptr_t tat = task_and_tag.load(std::memory_order_acquire);
__TBB_ASSERT( tat == from_bit || (is_shared(tat) && task_ptr(tat)),
"Proxy's tag cannot specify both locations if the proxy "
"was retrieved from one of its original locations" );
if ( tat != from_bit ) {
const intptr_t cleaner_bit = location_mask & ~from_bit;
// Attempt to transition the proxy to the "empty" state with
// cleaner_bit specifying entity responsible for its eventual freeing.
// Explicit cast to void* is to work around a seeming ICC 11.1 bug.
if ( task_and_tag.compare_exchange_strong(tat, cleaner_bit) ) {
// Successfully grabbed the task, and left new owner with the job of freeing the proxy
return task_ptr(tat);
}
}
// Proxied task has already been claimed from another proxy location.
__TBB_ASSERT( task_and_tag.load(std::memory_order_relaxed) == from_bit, "Empty proxy cannot contain non-zero task pointer" );
return nullptr;
}
task* execute(d1::execution_data&) override {
__TBB_ASSERT_RELEASE(false, nullptr);
return nullptr;
}
task* cancel(d1::execution_data&) override {
__TBB_ASSERT_RELEASE(false, nullptr);
return nullptr;
}
}; // struct task_proxy
//! Internal representation of mail_outbox, without padding.
class unpadded_mail_outbox {
protected:
typedef std::atomic<task_proxy*> atomic_proxy_ptr;
//! Pointer to first task_proxy in mailbox, or nullptr if box is empty.
atomic_proxy_ptr my_first;
//! Pointer to pointer that will point to next item in the queue. Never nullptr.
std::atomic<atomic_proxy_ptr*> my_last;
//! Owner of mailbox is not executing a task, and has drained its own task pool.
std::atomic<bool> my_is_idle;
};
// TODO: - consider moving to arena slot
//! Class representing where mail is put.
/** Padded to occupy a cache line. */
class mail_outbox : padded<unpadded_mail_outbox> {
task_proxy* internal_pop( isolation_type isolation ) {
task_proxy* curr = my_first.load(std::memory_order_acquire);
if ( !curr )
return nullptr;
atomic_proxy_ptr* prev_ptr = &my_first;
if ( isolation != no_isolation ) {
while ( task_accessor::isolation(*curr) != isolation ) {
prev_ptr = &curr->next_in_mailbox;
// The next_in_mailbox should be read with acquire to guarantee (*curr) consistency.
curr = curr->next_in_mailbox.load(std::memory_order_acquire);
if ( !curr )
return nullptr;
}
}
// There is a first item in the mailbox. See if there is a second.
// The next_in_mailbox should be read with acquire to guarantee (*second) consistency.
if ( task_proxy* second = curr->next_in_mailbox.load(std::memory_order_acquire) ) {
// There are at least two items, so first item can be popped easily.
prev_ptr->store(second, std::memory_order_relaxed);
} else {
// There is only one item. Some care is required to pop it.
prev_ptr->store(nullptr, std::memory_order_relaxed);
atomic_proxy_ptr* expected = &curr->next_in_mailbox;
if ( my_last.compare_exchange_strong( expected, prev_ptr ) ) {
// Successfully transitioned mailbox from having one item to having none.
__TBB_ASSERT( !curr->next_in_mailbox.load(std::memory_order_relaxed), nullptr);
} else {
// Some other thread updated my_last but has not filled in first->next_in_mailbox
// Wait until first item points to second item.
atomic_backoff backoff;
// The next_in_mailbox should be read with acquire to guarantee (*second) consistency.
while ( !(second = curr->next_in_mailbox.load(std::memory_order_acquire)) ) backoff.pause();
prev_ptr->store( second, std::memory_order_relaxed);
}
}
assert_pointer_valid(curr);
return curr;
}
public:
friend class mail_inbox;
//! Push task_proxy onto the mailbox queue of another thread.
/** Implementation is wait-free. */
void push( task_proxy* t ) {
assert_pointer_valid(t);
t->next_in_mailbox.store(nullptr, std::memory_order_relaxed);
atomic_proxy_ptr* const link = my_last.exchange(&t->next_in_mailbox);
// Logically, the release fence is not required because the exchange above provides the
// release-acquire semantic that guarantees that (*t) will be consistent when another thread
// loads the link atomic. However, C++11 memory model guarantees consistency of(*t) only
// when the same atomic is used for synchronization.
link->store(t, std::memory_order_release);
}
//! Return true if mailbox is empty
bool empty() {
return my_first.load(std::memory_order_relaxed) == nullptr;
}
//! Construct *this as a mailbox from zeroed memory.
/** Raise assertion if *this is not previously zeroed, or sizeof(this) is wrong.
This method is provided instead of a full constructor since we know the object
will be constructed in zeroed memory. */
void construct() {
__TBB_ASSERT( sizeof(*this)==max_nfs_size, nullptr );
__TBB_ASSERT( !my_first.load(std::memory_order_relaxed), nullptr );
__TBB_ASSERT( !my_last.load(std::memory_order_relaxed), nullptr );
__TBB_ASSERT( !my_is_idle.load(std::memory_order_relaxed), nullptr );
my_last = &my_first;
suppress_unused_warning(pad);
}
//! Drain the mailbox
void drain() {
// No fences here because other threads have already quit.
for( ; task_proxy* t = my_first; ) {
my_first.store(t->next_in_mailbox, std::memory_order_relaxed);
t->allocator.delete_object(t);
}
}
//! True if thread that owns this mailbox is looking for work.
bool recipient_is_idle() {
return my_is_idle.load(std::memory_order_relaxed);
}
}; // class mail_outbox
//! Class representing source of mail.
class mail_inbox {
//! Corresponding sink where mail that we receive will be put.
mail_outbox* my_putter;
public:
//! Construct unattached inbox
mail_inbox() : my_putter(nullptr) {}
//! Attach inbox to a corresponding outbox.
void attach( mail_outbox& putter ) {
my_putter = &putter;
}
//! Detach inbox from its outbox
void detach() {
__TBB_ASSERT(my_putter,"not attached");
my_putter = nullptr;
}
//! Get next piece of mail, or nullptr if mailbox is empty.
task_proxy* pop( isolation_type isolation ) {
return my_putter->internal_pop( isolation );
}
//! Return true if mailbox is empty
bool empty() {
return my_putter->empty();
}
//! Indicate whether thread that reads this mailbox is idle.
/** Raises assertion failure if mailbox is redundantly marked as not idle. */
void set_is_idle( bool value ) {
if( my_putter ) {
__TBB_ASSERT( my_putter->my_is_idle.load(std::memory_order_relaxed) || value, "attempt to redundantly mark mailbox as not idle" );
my_putter->my_is_idle.store(value, std::memory_order_relaxed);
}
}
//! Indicate whether thread that reads this mailbox is idle.
bool is_idle_state ( bool value ) const {
return !my_putter || my_putter->my_is_idle.load(std::memory_order_relaxed) == value;
}
}; // class mail_inbox
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_mailbox_H */

172
third_party/tbb/main.cc vendored Normal file
View file

@ -0,0 +1,172 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/main.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/threading_control.hh"
#include "third_party/tbb/environment.hh"
#include "third_party/tbb/market.hh"
#include "third_party/tbb/misc.hh"
#include "third_party/tbb/itt_notify.hh"
namespace tbb {
namespace detail {
namespace r1 {
//------------------------------------------------------------------------
// Begin shared data layout.
// The following global data items are mostly read-only after initialization.
//------------------------------------------------------------------------
//------------------------------------------------------------------------
// governor data
basic_tls<thread_data*> governor::theTLS;
rml::tbb_factory governor::theRMLServerFactory;
bool governor::UsePrivateRML;
bool governor::is_rethrow_broken;
//------------------------------------------------------------------------
// threading_control data
threading_control* threading_control::g_threading_control;
threading_control::global_mutex_type threading_control::g_threading_control_mutex;
//------------------------------------------------------------------------
// context propagation data
context_state_propagation_mutex_type the_context_state_propagation_mutex;
std::atomic<uintptr_t> the_context_state_propagation_epoch{};
//------------------------------------------------------------------------
// One time initialization data
//! Counter of references to global shared resources such as TLS.
std::atomic<int> __TBB_InitOnce::count{};
std::atomic_flag __TBB_InitOnce::InitializationLock = ATOMIC_FLAG_INIT;
//! Flag that is set to true after one-time initializations are done.
std::atomic<bool> __TBB_InitOnce::InitializationDone{};
#if __TBB_USE_ITT_NOTIFY
//! Defined in profiling.cpp
extern bool ITT_Present;
void ITT_DoUnsafeOneTimeInitialization();
#endif
#if !(_WIN32||_WIN64) || __TBB_SOURCE_DIRECTLY_INCLUDED
static __TBB_InitOnce __TBB_InitOnceHiddenInstance;
#endif
#if TBB_USE_ASSERT
std::atomic<int> the_observer_proxy_count;
struct check_observer_proxy_count {
~check_observer_proxy_count() {
if (the_observer_proxy_count != 0) {
runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count));
}
}
};
// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count
// after auto termination.
static check_observer_proxy_count the_check_observer_proxy_count;
#endif /* TBB_USE_ASSERT */
//------------------------------------------------------------------------
// __TBB_InitOnce
//------------------------------------------------------------------------
void __TBB_InitOnce::add_ref() {
if( ++count==1 )
governor::acquire_resources();
}
void __TBB_InitOnce::remove_ref() {
int k = --count;
__TBB_ASSERT(k>=0,"removed __TBB_InitOnce ref that was not added?");
if( k==0 ) {
governor::release_resources();
ITT_FINI_ITTLIB();
ITT_RELEASE_RESOURCES();
}
}
//------------------------------------------------------------------------
// One-time Initializations
//------------------------------------------------------------------------
//! Defined in cache_aligned_allocator.cpp
void initialize_cache_aligned_allocator();
//! Performs thread-safe lazy one-time general TBB initialization.
void DoOneTimeInitialization() {
__TBB_InitOnce::lock();
// No fence required for load of InitializationDone, because we are inside a critical section.
if( !__TBB_InitOnce::InitializationDone ) {
__TBB_InitOnce::add_ref();
if( GetBoolEnvironmentVariable("TBB_VERSION") )
PrintVersion();
bool itt_present = false;
#if __TBB_USE_ITT_NOTIFY
ITT_DoUnsafeOneTimeInitialization();
itt_present = ITT_Present;
#endif /* __TBB_USE_ITT_NOTIFY */
initialize_cache_aligned_allocator();
governor::initialize_rml_factory();
// Force processor groups support detection
governor::default_num_threads();
// Force OS regular page size detection
governor::default_page_size();
PrintExtraVersionInfo( "TOOLS SUPPORT", itt_present ? "enabled" : "disabled" );
__TBB_InitOnce::InitializationDone = true;
}
__TBB_InitOnce::unlock();
}
#if (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED
//! Windows "DllMain" that handles startup and shutdown of dynamic library.
extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvReserved ) {
switch( reason ) {
case DLL_PROCESS_ATTACH:
__TBB_InitOnce::add_ref();
break;
case DLL_PROCESS_DETACH:
// Since THREAD_DETACH is not called for the main thread, call auto-termination
// here as well - but not during process shutdown (due to risk of a deadlock).
if ( lpvReserved == nullptr ) { // library unload
governor::terminate_external_thread();
}
__TBB_InitOnce::remove_ref();
// It is assumed that InitializationDone is not set after DLL_PROCESS_DETACH,
// and thus no race on InitializationDone is possible.
if ( __TBB_InitOnce::initialization_done() ) {
// Remove reference that we added in DoOneTimeInitialization.
__TBB_InitOnce::remove_ref();
}
break;
case DLL_THREAD_DETACH:
governor::terminate_external_thread();
break;
}
return true;
}
#endif /* (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED */
} // namespace r1
} // namespace detail
} // namespace tbb

100
third_party/tbb/main.hh vendored Normal file
View file

@ -0,0 +1,100 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_main_H
#define _TBB_main_H
#include "third_party/tbb/governor.hh"
#include "third_party/libcxx/atomic"
namespace tbb {
namespace detail {
namespace r1 {
void DoOneTimeInitialization();
//------------------------------------------------------------------------
// __TBB_InitOnce
//------------------------------------------------------------------------
// TODO (TBB_REVAMP_TODO): consider better names
//! Class that supports TBB initialization.
/** It handles acquisition and release of global resources (e.g. TLS) during startup and shutdown,
as well as synchronization for DoOneTimeInitialization. */
class __TBB_InitOnce {
friend void DoOneTimeInitialization();
friend void ITT_DoUnsafeOneTimeInitialization();
static std::atomic<int> count;
//! Platform specific code to acquire resources.
static void acquire_resources();
//! Platform specific code to release resources.
static void release_resources();
//! Specifies if the one-time initializations has been done.
static std::atomic<bool> InitializationDone;
//! Global initialization lock
/** Scenarios are possible when tools interop has to be initialized before the
TBB itself. This imposes a requirement that the global initialization lock
has to support valid static initialization, and does not issue any tool
notifications in any build mode. **/
static std::atomic_flag InitializationLock;
public:
static void lock() {
tbb::detail::atomic_backoff backoff;
while( InitializationLock.test_and_set() ) backoff.pause();
}
static void unlock() { InitializationLock.clear(std::memory_order_release); }
static bool initialization_done() { return InitializationDone.load(std::memory_order_acquire); }
//! Add initial reference to resources.
/** We assume that dynamic loading of the library prevents any other threads
from entering the library until this constructor has finished running. **/
__TBB_InitOnce() { add_ref(); }
//! Remove the initial reference to resources.
/** This is not necessarily the last reference if other threads are still running. **/
~__TBB_InitOnce() {
governor::terminate_external_thread(); // TLS dtor not called for the main thread
remove_ref();
// We assume that InitializationDone is not set after file-scope destructors
// start running, and thus no race on InitializationDone is possible.
if ( initialization_done() ) {
// Remove an extra reference that was added in DoOneTimeInitialization.
remove_ref();
}
}
//! Add reference to resources. If first reference added, acquire the resources.
static void add_ref();
//! Remove reference to resources. If last reference removed, release the resources.
static void remove_ref();
}; // class __TBB_InitOnce
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_main_H */

140
third_party/tbb/market.cc vendored Normal file
View file

@ -0,0 +1,140 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/tbb/arena.hh"
#include "third_party/tbb/market.hh"
#include "third_party/libcxx/algorithm" // std::find
namespace tbb {
namespace detail {
namespace r1 {
class tbb_permit_manager_client : public pm_client {
public:
tbb_permit_manager_client(arena& a) : pm_client(a) {}
void set_allotment(unsigned allotment) {
my_arena.set_allotment(allotment);
}
};
//------------------------------------------------------------------------
// market
//------------------------------------------------------------------------
market::market(unsigned workers_soft_limit)
: my_num_workers_soft_limit(workers_soft_limit)
{}
pm_client* market::create_client(arena& a) {
return new (cache_aligned_allocate(sizeof(tbb_permit_manager_client))) tbb_permit_manager_client(a);
}
void market::register_client(pm_client* c) {
mutex_type::scoped_lock lock(my_mutex);
my_clients[c->priority_level()].push_back(c);
}
void market::unregister_and_destroy_client(pm_client& c) {
{
mutex_type::scoped_lock lock(my_mutex);
auto& clients = my_clients[c.priority_level()];
auto it = std::find(clients.begin(), clients.end(), &c);
__TBB_ASSERT(it != clients.end(), "Destroying of an unregistered client");
clients.erase(it);
}
auto client = static_cast<tbb_permit_manager_client*>(&c);
client->~tbb_permit_manager_client();
cache_aligned_deallocate(client);
}
void market::update_allotment() {
int effective_soft_limit = my_mandatory_num_requested > 0 && my_num_workers_soft_limit == 0 ? 1 : my_num_workers_soft_limit;
int max_workers = min(my_total_demand, effective_soft_limit);
__TBB_ASSERT(max_workers >= 0, nullptr);
int unassigned_workers = max_workers;
int assigned = 0;
int carry = 0;
unsigned max_priority_level = num_priority_levels;
for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) {
int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers);
unassigned_workers -= assigned_per_priority;
// We use reverse iterator there to serve last added clients first
for (auto it = my_clients[list_idx].rbegin(); it != my_clients[list_idx].rend(); ++it) {
tbb_permit_manager_client& client = static_cast<tbb_permit_manager_client&>(**it);
if (client.max_workers() == 0) {
client.set_allotment(0);
continue;
}
if (max_priority_level == num_priority_levels) {
max_priority_level = list_idx;
}
int allotted = 0;
if (my_num_workers_soft_limit == 0) {
__TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr);
allotted = client.min_workers() > 0 && assigned < max_workers ? 1 : 0;
} else {
int tmp = client.max_workers() * assigned_per_priority + carry;
allotted = tmp / my_priority_level_demand[list_idx];
carry = tmp % my_priority_level_demand[list_idx];
__TBB_ASSERT(allotted <= client.max_workers(), nullptr);
}
client.set_allotment(allotted);
client.set_top_priority(list_idx == max_priority_level);
assigned += allotted;
}
}
__TBB_ASSERT(assigned == max_workers, nullptr);
}
void market::set_active_num_workers(int soft_limit) {
mutex_type::scoped_lock lock(my_mutex);
if (my_num_workers_soft_limit != soft_limit) {
my_num_workers_soft_limit = soft_limit;
update_allotment();
}
}
void market::adjust_demand(pm_client& c, int mandatory_delta, int workers_delta) {
__TBB_ASSERT(-1 <= mandatory_delta && mandatory_delta <= 1, nullptr);
int delta{};
{
mutex_type::scoped_lock lock(my_mutex);
// Update client's state
delta = c.update_request(mandatory_delta, workers_delta);
// Update market's state
my_total_demand += delta;
my_priority_level_demand[c.priority_level()] += delta;
my_mandatory_num_requested += mandatory_delta;
update_allotment();
}
notify_thread_request(delta);
}
} // namespace r1
} // namespace detail
} // namespace tbb

79
third_party/tbb/market.hh vendored Normal file
View file

@ -0,0 +1,79 @@
// clang-format off
/*
Copyright (c) 2005-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_market_H
#define _TBB_market_H
#include "third_party/tbb/rw_mutex.hh"
#include "third_party/tbb/tbb_allocator.hh"
#include "third_party/tbb/task_arena.hh"
#include "third_party/tbb/permit_manager.hh"
#include "third_party/tbb/pm_client.hh"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/vector"
namespace tbb {
namespace detail {
namespace r1 {
class market : public permit_manager {
public:
market(unsigned soft_limit);
pm_client* create_client(arena& a) override;
void register_client(pm_client* client) override;
void unregister_and_destroy_client(pm_client& c) override;
//! Request that arena's need in workers should be adjusted.
void adjust_demand(pm_client&, int mandatory_delta, int workers_delta) override;
//! Set number of active workers
void set_active_num_workers(int soft_limit) override;
private:
//! Recalculates the number of workers assigned to each arena in the list.
void update_allotment();
//! Keys for the arena map array. The lower the value the higher priority of the arena list.
static constexpr unsigned num_priority_levels = d1::num_priority_levels;
using mutex_type = d1::rw_mutex;
mutex_type my_mutex;
//! Current application-imposed limit on the number of workers
int my_num_workers_soft_limit;
//! Number of workers that were requested by all arenas on all priority levels
int my_total_demand{0};
//! Number of workers that were requested by arenas per single priority list item
int my_priority_level_demand[num_priority_levels] = {0};
//! How many times mandatory concurrency was requested from the market
int my_mandatory_num_requested{0};
//! Per priority list of registered arenas
using clients_container_type = std::vector<pm_client*, tbb::tbb_allocator<pm_client*>>;
clients_container_type my_clients[num_priority_levels];
}; // class market
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_market_H */

273
third_party/tbb/memory_pool.hh vendored Normal file
View file

@ -0,0 +1,273 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_memory_pool_H
#define __TBB_memory_pool_H
#if !TBB_PREVIEW_MEMORY_POOL
#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
#endif
/** @file */
#include "third_party/tbb/scalable_allocator.hh"
#include "third_party/libcxx/new" // std::bad_alloc
#include "third_party/libcxx/stdexcept" // std::runtime_error, std::invalid_argument
#include "third_party/libcxx/utility" // std::forward
#if __TBB_EXTRA_DEBUG
#define __TBBMALLOC_ASSERT ASSERT
#else
#define __TBBMALLOC_ASSERT(a,b) ((void)0)
#endif
namespace tbb {
namespace detail {
namespace d1 {
//! Base of thread-safe pool allocator for variable-size requests
class pool_base : no_copy {
// Pool interface is separate from standard allocator classes because it has
// to maintain internal state, no copy or assignment. Move and swap are possible.
public:
//! Reset pool to reuse its memory (free all objects at once)
void recycle() { rml::pool_reset(my_pool); }
//! The "malloc" analogue to allocate block of memory of size bytes
void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); }
//! The "free" analogue to discard a previously allocated piece of memory.
void free(void* ptr) { rml::pool_free(my_pool, ptr); }
//! The "realloc" analogue complementing pool_malloc.
// Enables some low-level optimization possibilities
void *realloc(void* ptr, size_t size) {
return rml::pool_realloc(my_pool, ptr, size);
}
protected:
//! destroy pool - must be called in a child class
void destroy() { rml::pool_destroy(my_pool); }
rml::MemoryPool *my_pool;
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push)
#pragma warning (disable: 4100)
#endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** @ingroup memory_allocation */
template<typename T, typename P = pool_base>
class memory_pool_allocator {
protected:
typedef P pool_type;
pool_type *my_pool;
template<typename U, typename R>
friend class memory_pool_allocator;
template<typename V, typename U, typename R>
friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
template<typename V, typename U, typename R>
friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
public:
typedef T value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
template<typename U> struct rebind {
typedef memory_pool_allocator<U, P> other;
};
explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {}
memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
template<typename U>
memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
pointer address(reference x) const { return &x; }
const_pointer address(const_reference x) const { return &x; }
//! Allocate space for n objects.
pointer allocate( size_type n, const void* /*hint*/ = nullptr) {
pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) );
if (!p)
throw_exception(std::bad_alloc());
return p;
}
//! Free previously allocated block of memory.
void deallocate( pointer p, size_type ) {
my_pool->free(p);
}
//! Largest value for which method allocate might succeed.
size_type max_size() const throw() {
size_type max = static_cast<size_type>(-1) / sizeof (value_type);
return (max > 0 ? max : 1);
}
//! Copy-construct value at location pointed to by p.
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new((void *)p) U(std::forward<Args>(args)...); }
//! Destroy value at location pointed to by p.
void destroy( pointer p ) { p->~value_type(); }
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<typename P>
class memory_pool_allocator<void, P> {
public:
typedef P pool_type;
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template<typename U> struct rebind {
typedef memory_pool_allocator<U, P> other;
};
explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {}
memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
template<typename U>
memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
protected:
pool_type *my_pool;
template<typename U, typename R>
friend class memory_pool_allocator;
template<typename V, typename U, typename R>
friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
template<typename V, typename U, typename R>
friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
};
template<typename T, typename U, typename P>
inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;}
template<typename T, typename U, typename P>
inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;}
//! Thread-safe growable pool allocator for variable-size requests
template <typename Alloc>
class memory_pool : public pool_base {
Alloc my_alloc; // TODO: base-class optimization
static void *allocate_request(intptr_t pool_id, size_t & bytes);
static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes);
public:
//! construct pool with underlying allocator
explicit memory_pool(const Alloc &src = Alloc());
//! destroy pool
~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter
};
class fixed_pool : public pool_base {
void *my_buffer;
size_t my_size;
inline static void *allocate_request(intptr_t pool_id, size_t & bytes);
public:
//! construct pool with underlying allocator
inline fixed_pool(void *buf, size_t size);
//! destroy pool
~fixed_pool() { destroy(); }
};
//////////////// Implementation ///////////////
template <typename Alloc>
memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) {
rml::MemPoolPolicy args(allocate_request, deallocate_request,
sizeof(typename Alloc::value_type));
rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
if (res!=rml::POOL_OK)
throw_exception(std::runtime_error("Can't create pool"));
}
template <typename Alloc>
void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) {
memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
const size_t unit_size = sizeof(typename Alloc::value_type);
__TBBMALLOC_ASSERT( 0 == bytes%unit_size, nullptr);
void *ptr;
#if TBB_USE_EXCEPTIONS
try {
#endif
ptr = self.my_alloc.allocate( bytes/unit_size );
#if TBB_USE_EXCEPTIONS
} catch(...) {
return nullptr;
}
#endif
return ptr;
}
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
// Workaround for erroneous "unreachable code" warning in the template below.
// Specific for VC++ 17-18 compiler
#pragma warning (push)
#pragma warning (disable: 4702)
#endif
template <typename Alloc>
int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) {
memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
const size_t unit_size = sizeof(typename Alloc::value_type);
__TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, nullptr);
self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size );
return 0;
}
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
#pragma warning (pop)
#endif
inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) {
if (!buf || !size)
// TODO: improve support for mode with exceptions disabled
throw_exception(std::invalid_argument("Zero in parameter is invalid"));
rml::MemPoolPolicy args(allocate_request, nullptr, size, /*fixedPool=*/true);
rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
if (res!=rml::POOL_OK)
throw_exception(std::runtime_error("Can't create pool"));
}
inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
__TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice.");
bytes = self.my_size;
self.my_size = 0; // remember that buffer has been used
return self.my_buffer;
}
} // namespace d1
} // namespace detail
inline namespace v1 {
using detail::d1::memory_pool_allocator;
using detail::d1::memory_pool;
using detail::d1::fixed_pool;
} // inline namepspace v1
} // namespace tbb
#undef __TBBMALLOC_ASSERT
#endif// __TBB_memory_pool_H

176
third_party/tbb/misc.cc vendored Normal file
View file

@ -0,0 +1,176 @@
// clang-format off
/*
Copyright (c) 2005-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Source file for miscellaneous entities that are infrequently referenced by
// an executing program.
#include "third_party/tbb/detail/_exception.hh"
#include "third_party/tbb/detail/_machine.hh"
#include "third_party/tbb/version.hh"
#include "third_party/tbb/misc.hh"
#include "third_party/tbb/governor.hh"
#include "third_party/tbb/assert_impl.hh" // Out-of-line TBB assertion handling routines are instantiated here.
#include "third_party/tbb/concurrent_monitor_mutex.hh"
#include "third_party/libcxx/cstdio"
#include "third_party/libcxx/cstdlib"
#include "third_party/libcxx/stdexcept"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/cstdarg"
#if _WIN32||_WIN64
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
#endif
#if !_WIN32
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h" // sysconf(_SC_PAGESIZE)
#endif
namespace tbb {
namespace detail {
namespace r1 {
//------------------------------------------------------------------------
// governor data
//------------------------------------------------------------------------
cpu_features_type governor::cpu_features;
//------------------------------------------------------------------------
// concurrent_monitor_mutex data
//------------------------------------------------------------------------
#if !__TBB_USE_FUTEX
std::mutex concurrent_monitor_mutex::my_init_mutex;
#endif
size_t DefaultSystemPageSize() {
#if _WIN32
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
return sysconf(_SC_PAGESIZE);
#endif
}
/** The leading "\0" is here so that applying "strings" to the binary delivers a clean result. */
static const char VersionString[] = "\0" TBB_VERSION_STRINGS;
static bool PrintVersionFlag = false;
void PrintVersion() {
PrintVersionFlag = true;
std::fputs(VersionString+1,stderr);
}
void PrintExtraVersionInfo( const char* category, const char* format, ... ) {
if( PrintVersionFlag ) {
char str[1024]; std::memset(str, 0, 1024);
va_list args; va_start(args, format);
// Note: correct vsnprintf definition obtained from tbb_assert_impl.h
std::vsnprintf( str, 1024-1, format, args);
va_end(args);
std::fprintf(stderr, "oneTBB: %s\t%s\n", category, str );
}
}
//! check for transaction support.
#if _MSC_VER
// MISSING #include <intrin.h> // for __cpuid
#endif
#if __TBB_x86_32 || __TBB_x86_64
void check_cpuid(int leaf, int sub_leaf, int registers[4]) {
#if _MSC_VER
__cpuidex(registers, leaf, sub_leaf);
#else
int reg_eax = 0;
int reg_ebx = 0;
int reg_ecx = 0;
int reg_edx = 0;
#if __TBB_x86_32 && __PIC__
// On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX
// register. We help it using backup and restore.
__asm__("mov %%ebx, %%esi\n\t"
"cpuid\n\t"
"xchg %%ebx, %%esi"
: "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx)
: "0"(leaf), "2"(sub_leaf) // read value from eax and ecx
);
#else
__asm__("cpuid"
: "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx)
: "0"(leaf), "2"(sub_leaf) // read value from eax and ecx
);
#endif
registers[0] = reg_eax;
registers[1] = reg_ebx;
registers[2] = reg_ecx;
registers[3] = reg_edx;
#endif
}
#endif
void detect_cpu_features(cpu_features_type& cpu_features) {
suppress_unused_warning(cpu_features);
#if __TBB_x86_32 || __TBB_x86_64
const int rtm_ebx_mask = 1 << 11;
const int waitpkg_ecx_mask = 1 << 5;
int registers[4] = {0};
// Check RTM and WAITPKG
check_cpuid(7, 0, registers);
cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0;
cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0;
#endif /* (__TBB_x86_32 || __TBB_x86_64) */
}
} // namespace r1
} // namespace detail
} // namespace tbb

298
third_party/tbb/misc.hh vendored Normal file
View file

@ -0,0 +1,298 @@
// clang-format off
/*
Copyright (c) 2005-2022 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _TBB_tbb_misc_H
#define _TBB_tbb_misc_H
#include "third_party/tbb/detail/_config.hh"
#include "third_party/tbb/detail/_assert.hh"
#include "third_party/tbb/detail/_utils.hh"
#if __TBB_ARENA_BINDING
#include "third_party/tbb/info.hh"
#endif /*__TBB_ARENA_BINDING*/
#if __unix__
#include "libc/intrin/newbie.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/rusage.h"
#include "libc/calls/sysparam.h"
#include "libc/calls/weirdtypes.h"
#include "libc/limits.h"
#include "libc/sysv/consts/endian.h"
#include "libc/sysv/consts/prio.h"
#include "libc/sysv/consts/rlim.h"
#include "libc/sysv/consts/rlimit.h"
#include "libc/sysv/consts/rusage.h" // __FreeBSD_version
#if __FreeBSD_version >= 701000
// MISSING #include <sys/cpuset.h>
#endif
#endif
#include "third_party/libcxx/atomic"
// Does the operating system have a system call to pin a thread to a set of OS processors?
#define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000))
// On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB,
// and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account.
#define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__)
namespace tbb {
namespace detail {
namespace r1 {
void runtime_warning(const char* format, ... );
#if __TBB_ARENA_BINDING
class task_arena;
class task_scheduler_observer;
#endif /*__TBB_ARENA_BINDING*/
const std::size_t MByte = 1024*1024;
#if __TBB_USE_WINAPI
// The Microsoft Documentation about Thread Stack Size states that
// "The default stack reservation size used by the linker is 1 MB"
const std::size_t ThreadStackSize = 1*MByte;
#else
const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte;
#endif
#ifndef __TBB_HardwareConcurrency
//! Returns maximal parallelism level supported by the current OS configuration.
int AvailableHwConcurrency();
#else
inline int AvailableHwConcurrency() {
int n = __TBB_HardwareConcurrency();
return n > 0 ? n : 1; // Fail safety strap
}
#endif /* __TBB_HardwareConcurrency */
//! Returns OS regular memory page size
size_t DefaultSystemPageSize();
//! Returns number of processor groups in the current OS configuration.
/** AvailableHwConcurrency must be called at least once before calling this method. **/
int NumberOfProcessorGroups();
#if _WIN32||_WIN64
//! Retrieves index of processor group containing processor with the given index
int FindProcessorGroupIndex ( int processorIndex );
//! Affinitizes the thread to the specified processor group
void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex );
#endif /* _WIN32||_WIN64 */
//! Prints TBB version information on stderr
void PrintVersion();
//! Prints arbitrary extra TBB version information on stderr
void PrintExtraVersionInfo( const char* category, const char* format, ... );
//! A callback routine to print RML version information on stderr
void PrintRMLVersionInfo( void* arg, const char* server_info );
// For TBB compilation only; not to be used in public headers
#if defined(min) || defined(max)
#undef min
#undef max
#endif
//! Utility template function returning lesser of the two values.
/** Provided here to avoid including not strict safe <algorithm>.\n
In case operands cause signed/unsigned or size mismatch warnings it is caller's
responsibility to do the appropriate cast before calling the function. **/
template<typename T>
T min ( const T& val1, const T& val2 ) {
return val1 < val2 ? val1 : val2;
}
//! Utility template function returning greater of the two values.
/** Provided here to avoid including not strict safe <algorithm>.\n
In case operands cause signed/unsigned or size mismatch warnings it is caller's
responsibility to do the appropriate cast before calling the function. **/
template<typename T>
T max ( const T& val1, const T& val2 ) {
return val1 < val2 ? val2 : val1;
}
//! Utility helper structure to ease overload resolution
template<int > struct int_to_type {};
//------------------------------------------------------------------------
// FastRandom
//------------------------------------------------------------------------
//! A fast random number generator.
/** Uses linear congruential method. */
class FastRandom {
private:
unsigned x, c;
static const unsigned a = 0x9e3779b1; // a big prime number
public:
//! Get a random number.
unsigned short get() {
return get(x);
}
//! Get a random number for the given seed; update the seed for next use.
unsigned short get( unsigned& seed ) {
unsigned short r = (unsigned short)(seed>>16);
__TBB_ASSERT(c&1, "c must be odd for big rng period");
seed = seed*a+c;
return r;
}
//! Construct a random number generator.
FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); }
template <typename T>
void init( T seed ) {
init(seed,int_to_type<sizeof(seed)>());
}
void init( uint64_t seed , int_to_type<8> ) {
init(uint32_t((seed>>32)+seed), int_to_type<4>());
}
void init( uint32_t seed, int_to_type<4> ) {
// threads use different seeds for unique sequences
c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number
x = c^(seed>>1); // also shuffle x for the first get() invocation
}
};
//------------------------------------------------------------------------
// Atomic extensions
//------------------------------------------------------------------------
//! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate
/** Return value semantics is the same as for CAS. **/
template<typename T1, class Pred>
T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) {
T1 oldValue = dst.load(std::memory_order_acquire);
while ( compare(oldValue, newValue) ) {
if ( dst.compare_exchange_strong(oldValue, newValue) )
break;
}
return oldValue;
}
#if __TBB_USE_OS_AFFINITY_SYSCALL
#if __linux__
typedef cpu_set_t basic_mask_t;
#elif __FreeBSD_version >= 701000
typedef cpuset_t basic_mask_t;
#else
#error affinity_helper is not implemented in this OS
#endif
class affinity_helper : no_copy {
basic_mask_t* threadMask;
int is_changed;
public:
affinity_helper() : threadMask(nullptr), is_changed(0) {}
~affinity_helper();
void protect_affinity_mask( bool restore_process_mask );
void dismiss();
};
void destroy_process_mask();
#else
class affinity_helper : no_copy {
public:
void protect_affinity_mask( bool ) {}
};
inline void destroy_process_mask(){}
#endif /* __TBB_USE_OS_AFFINITY_SYSCALL */
struct cpu_features_type {
bool rtm_enabled{false};
bool waitpkg_enabled{false};
};
void detect_cpu_features(cpu_features_type& cpu_features);
#if __TBB_ARENA_BINDING
class binding_handler;
binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core);
void destroy_binding_handler(binding_handler* handler_ptr);
void apply_affinity_mask(binding_handler* handler_ptr, int slot_num);
void restore_affinity_mask(binding_handler* handler_ptr, int slot_num);
#endif /*__TBB_ARENA_BINDING*/
// RTM specific section
// abort code for mutexes that detect a conflict with another thread.
enum {
speculation_not_supported = 0x00,
speculation_transaction_aborted = 0x01,
speculation_can_retry = 0x02,
speculation_memadd_conflict = 0x04,
speculation_buffer_overflow = 0x08,
speculation_breakpoint_hit = 0x10,
speculation_nested_abort = 0x20,
speculation_xabort_mask = 0xFF000000,
speculation_xabort_shift = 24,
speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
speculation_successful_begin = 0xFFFFFFFF,
speculation_retry = speculation_transaction_aborted
| speculation_can_retry
| speculation_memadd_conflict
};
// We suppose that successful transactions are sequentially ordered and
// do not require additional memory fences around them.
// Technically it can be achieved only if xbegin has implicit
// acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level.
// See the article: https://arxiv.org/pdf/1710.04839.pdf
static inline unsigned int begin_transaction() {
#if __TBB_TSX_INTRINSICS_PRESENT
return _xbegin();
#else
return speculation_not_supported; // return unsuccessful code
#endif
}
static inline void end_transaction() {
#if __TBB_TSX_INTRINSICS_PRESENT
_xend();
#endif
}
static inline void abort_transaction() {
#if __TBB_TSX_INTRINSICS_PRESENT
_xabort(speculation_xabort_not_free);
#endif
}
#if TBB_USE_ASSERT
static inline unsigned char is_in_transaction() {
#if __TBB_TSX_INTRINSICS_PRESENT
return _xtest();
#else
return 0;
#endif
}
#endif // TBB_USE_ASSERT
} // namespace r1
} // namespace detail
} // namespace tbb
#endif /* _TBB_tbb_misc_H */

Some files were not shown because too many files have changed in this diff Show more