diff --git a/CL/Utils/Context.h b/CL/Utils/Context.h new file mode 100644 index 000000000..cebb8b9eb --- /dev/null +++ b/CL/Utils/Context.h @@ -0,0 +1,50 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +// OpenCL includes +#include + +// STL includes +#include + +UTILS_EXPORT +cl_context cl_util_get_context(const cl_uint plat_id, const cl_uint dev_id, + const cl_device_type type, cl_int* const error); +UTILS_EXPORT +cl_device_id cl_util_get_device(const cl_uint plat_id, const cl_uint dev_id, + const cl_device_type type, cl_int* const error); + +UTILS_EXPORT +cl_int cl_util_print_device_info(const cl_device_id device); + +UTILS_EXPORT +char* cl_util_get_device_info(const cl_device_id device, + const cl_device_info info, cl_int* const error); +UTILS_EXPORT +char* cl_util_get_platform_info(const cl_platform_id platform, + const cl_platform_info info, + cl_int* const error); + +// build program and show log if build is not successful +UTILS_EXPORT +cl_int cl_util_build_program(const cl_program pr, const cl_device_id dev, + const char* const opt); + +#define GET_CURRENT_TIMER(time) \ + struct timespec time; \ + timespec_get(&time, TIME_UTC); \ + { \ + } + +#define TIMER_DIFFERENCE(dt, time1, time2) \ + { \ + dt = (time2.tv_sec - time1.tv_sec) * 1000000000 \ + + (time2.tv_nsec - time1.tv_nsec); \ + } + +#define START_TIMER GET_CURRENT_TIMER(start_timer1) +#define STOP_TIMER(dt) \ + GET_CURRENT_TIMER(stop_timer2) \ + TIMER_DIFFERENCE(dt, start_timer1, stop_timer2) diff --git a/CL/Utils/Context.hpp b/CL/Utils/Context.hpp new file mode 100644 index 000000000..bd1110c34 --- /dev/null +++ b/CL/Utils/Context.hpp @@ -0,0 +1,17 @@ +#pragma once + +// OpenCL SDK includes +#include "OpenCLUtilsCpp_Export.h" + +#include + +// OpenCL includes +#include + +namespace cl { +namespace util { + Context UTILSCPP_EXPORT get_context(cl_uint plat_id, cl_uint dev_id, + cl_device_type type, + cl_int* error = nullptr); +} +} diff --git a/CL/Utils/Detail.hpp b/CL/Utils/Detail.hpp new file mode 100644 index 000000000..49cccd02c --- /dev/null +++ b/CL/Utils/Detail.hpp @@ -0,0 +1,84 @@ +#pragma once + +// STL includes +#include +#include // std::forward, std::integer_sequence +#include // std::tuple, std::get +#include // std::initializer_list + +namespace cl { +namespace util { + namespace detail { + // Borrowed from: + // https://www.fluentcpp.com/2019/03/05/for_each_arg-applying-a-function-to-each-argument-of-a-function-in-cpp/ + template F for_each_arg(F f, Args&&... args) + { + (void)std::initializer_list{ ( + (void)f(std::forward(args)), 0)... }; + return f; + } + + namespace impl { + // Borrowed from: https://stackoverflow.com/a/16387374/1476661 + template + void for_each_in_tuple(T&& t, F&& f, + std::integer_sequence) + { + auto l = { + (std::forward(f)(std::get(std::forward(t))), 0)... + }; + (void)l; + } + } + template + void for_each_in_tuple(std::tuple const& t, F&& f) + { + impl::for_each_in_tuple( + t, std::forward(f), + std::make_integer_sequence()); + } + + namespace impl { + // Borrowed from + // https://codereview.stackexchange.com/questions/193420/apply-a-function-to-each-element-of-a-tuple-map-a-tuple + template + auto transform_tuple(Tuple&& t, F&& f, std::index_sequence) + { + return std::make_tuple(std::forward(f)(std::get(t))...); + } + } + template + auto transform_tuple(const std::tuple& t, F&& f) + { + return impl::transform_tuple( + t, std::forward(f), + std::make_index_sequence{}); + } + + namespace impl { + // Borrowed from + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3658.html + // with modifications of Casey Carter at + // https://stackoverflow.com/a/51365112/1476661 + template + auto apply(F&& f, Tuple&& args, std::index_sequence) + -> decltype(std::forward(f)( + std::get(std::forward(args))...)) + { + return std::forward(f)( + std::get(std::forward(args))...); + } + } + template >::value>> + auto apply(F&& f, Tuple&& args) + -> decltype(impl::apply(std::forward(f), + std::forward(args), Indices())) + { + return impl::apply(std::forward(f), std::forward(args), + Indices()); + } + } +} +} diff --git a/CL/Utils/Device.hpp b/CL/Utils/Device.hpp new file mode 100644 index 000000000..0a8aed8b6 --- /dev/null +++ b/CL/Utils/Device.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include "OpenCLUtilsCpp_Export.h" +#include + +#include + +namespace cl { +namespace util { + bool UTILSCPP_EXPORT opencl_c_version_contains( + const cl::Device& device, const cl::string& version_fragment); + + bool UTILSCPP_EXPORT supports_extension(const cl::Device& device, + const cl::string& extension); + +#ifdef CL_VERSION_3_0 + bool UTILSCPP_EXPORT supports_feature(const cl::Device& device, + const cl::string& feature_name); +#endif +} +} diff --git a/CL/Utils/Error.h b/CL/Utils/Error.h new file mode 100644 index 000000000..bf6e24bf5 --- /dev/null +++ b/CL/Utils/Error.h @@ -0,0 +1,88 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +// OpenCL Utils includes +#include + +// STL includes +#include // fprintf + +// OpenCL includes +#include + +// RET = function returns error code +// PAR = functions sets error code in the paremeter + +#ifdef _DEBUG + +#define OCLERROR_RET(func, err, label) \ + do \ + { \ + err = func; \ + if (err != CL_SUCCESS) \ + { \ + cl_util_print_error(err); \ + fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \ + __FILE__, #func); \ + goto label; \ + } \ + } while (0) + +#define OCLERROR_PAR(func, err, label) \ + do \ + { \ + func; \ + if (err != CL_SUCCESS) \ + { \ + cl_util_print_error(err); \ + fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \ + __FILE__, #func); \ + goto label; \ + } \ + } while (0) + +#define MEM_CHECK(func, err, label) \ + do \ + { \ + if ((func) == NULL) \ + { \ + err = CL_OUT_OF_HOST_MEMORY; \ + cl_util_print_error(err); \ + fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \ + __FILE__, #func); \ + goto label; \ + } \ + } while (0) + +#else + +#define OCLERROR_RET(func, err, label) \ + do \ + { \ + err = func; \ + if (err != CL_SUCCESS) goto label; \ + } while (0) + +#define OCLERROR_PAR(func, err, label) \ + do \ + { \ + func; \ + if (err != CL_SUCCESS) goto label; \ + } while (0) + +#define MEM_CHECK(func, err, label) \ + do \ + { \ + if ((func) == NULL) \ + { \ + err = CL_OUT_OF_HOST_MEMORY; \ + goto label; \ + } \ + } while (0) + +#endif + +UTILS_EXPORT +void cl_util_print_error(cl_int error); diff --git a/CL/Utils/Error.hpp b/CL/Utils/Error.hpp new file mode 100644 index 000000000..50df2f7b3 --- /dev/null +++ b/CL/Utils/Error.hpp @@ -0,0 +1,70 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtilsCpp_Export.h" + +// OpenCL Utils includes +#include + +// OpenCL includes +#include + +namespace cl { +namespace util { +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + /*! \brief Exception class + * + * This may be thrown by SDK utility functions when + * CL_HPP_ENABLE_EXCEPTIONS is defined. + */ + class Error : public std::exception { + private: + int err_; + const char* errStr_; + + public: + /*! \brief Create a new SDK error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char* errStr = NULL): err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char* what() const throw() + { + if (errStr_ == NULL) + { + return "empty"; + } + else + { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } + }; +#endif + + namespace detail { + UTILSCPP_EXPORT cl_int errHandler(cl_int err, cl_int* errPtr, + const char* errStr = nullptr); + } + +} +} diff --git a/CL/Utils/ErrorCodes.h b/CL/Utils/ErrorCodes.h new file mode 100644 index 000000000..a1f9d4a8e --- /dev/null +++ b/CL/Utils/ErrorCodes.h @@ -0,0 +1,5 @@ +#pragma once + +#define CL_UTIL_INDEX_OUT_OF_RANGE -2000 +#define CL_UTIL_DEVICE_NOT_INTEROPERABLE -2001 +#define CL_UTIL_FILE_OPERATION_ERROR -2002 diff --git a/CL/Utils/Event.h b/CL/Utils/Event.h new file mode 100644 index 000000000..f144e2157 --- /dev/null +++ b/CL/Utils/Event.h @@ -0,0 +1,13 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +// OpenCL includes +#include + +UTILS_EXPORT +cl_ulong cl_util_get_event_duration(const cl_event event, + const cl_profiling_info start, + const cl_profiling_info end, + cl_int* const error); \ No newline at end of file diff --git a/CL/Utils/Event.hpp b/CL/Utils/Event.hpp new file mode 100644 index 000000000..e5a6577eb --- /dev/null +++ b/CL/Utils/Event.hpp @@ -0,0 +1,21 @@ +#pragma once + +// OpenCL SDK includes +#include "OpenCLUtilsCpp_Export.h" + +// STL includes +#include + +// OpenCL includes +#include + +namespace cl { +namespace util { + template + auto get_duration(cl::Event& ev) + { + return std::chrono::duration_cast(std::chrono::nanoseconds{ + ev.getProfilingInfo() - ev.getProfilingInfo() }); + } +} +} diff --git a/CL/Utils/File.h b/CL/Utils/File.h new file mode 100644 index 000000000..62c8e95a7 --- /dev/null +++ b/CL/Utils/File.h @@ -0,0 +1,42 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +// OpenCL includes +#include + +// read all the text file contents securely in ANSI C89 +// return pointer to C-string with file contents +// can handle streams with no known size and no support for fseek +// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal +UTILS_EXPORT +char* cl_util_read_text_file(const char* const filename, size_t* const length, + cl_int* const error); + +// read all the binary file contents securely in ANSI C89 +// return pointer to file contents +// can handle streams with no known size and no support for fseek +// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal +UTILS_EXPORT +unsigned char* cl_util_read_binary_file(const char* const filename, + size_t* const length, + cl_int* const error); + +// write binaries of OpenCL compiled program +// binaries are written as separate files for each device +// with file name "(program_file_name)_(name of device).bin" +// based on variant of Logan +// http://logan.tw/posts/2014/11/22/pre-compile-the-opencl-kernel-program-part-2/ +UTILS_EXPORT +cl_int cl_util_write_binaries(const cl_program program, + const char* const program_file_name); + +// read binaries of OpenCL compiled program +// from files of file names "(program_file_name)_(name of device).bin" +UTILS_EXPORT +cl_program cl_util_read_binaries(const cl_context context, + const cl_device_id* const devices, + const cl_uint num_devices, + const char* const program_file_name, + cl_int* const error); diff --git a/CL/Utils/File.hpp b/CL/Utils/File.hpp new file mode 100644 index 000000000..984a32388 --- /dev/null +++ b/CL/Utils/File.hpp @@ -0,0 +1,49 @@ +#pragma once + +// OpenCL SDK includes +#include + +// STL includes +#include +#include + +namespace cl { +namespace util { + // Scott Meyers, Effective STL, Addison-Wesley Professional, 2001, Item 29 + // with error handling + UTILSCPP_EXPORT + std::string read_text_file(const char* const filename, cl_int* const error) + { + std::ifstream in(filename); + if (in.good()) + { + try + { + std::string red((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + if (in.good() && in.eof()) + { + if (error != nullptr) *error = CL_SUCCESS; + return red; + } + else + { + detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error, + "File read error!"); + return std::string(); + } + } catch (std::bad_alloc& ex) + { + detail::errHandler(CL_OUT_OF_RESOURCES, error, + "Bad allocation!"); + return std::string(); + } + } + else + { + detail::errHandler(CL_INVALID_VALUE, error, "No file!"); + return std::string(); + } + } +} +} diff --git a/CL/Utils/InteropContext.hpp b/CL/Utils/InteropContext.hpp new file mode 100644 index 000000000..2d8fdbfdc --- /dev/null +++ b/CL/Utils/InteropContext.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "OpenCLUtilsCpp_Export.h" +#include + +#include + +namespace cl { +namespace util { + vector + UTILSCPP_EXPORT get_interop_context_properties(const cl::Device& plat, + cl_int* error = nullptr); + + Context UTILSCPP_EXPORT get_interop_context(int plat_id, int dev_id, + cl_device_type type, + cl_int* error = nullptr); +} +} diff --git a/CL/Utils/OpenCLUtilsCpp_Export.h b/CL/Utils/OpenCLUtilsCpp_Export.h new file mode 100644 index 000000000..b063c9fe1 --- /dev/null +++ b/CL/Utils/OpenCLUtilsCpp_Export.h @@ -0,0 +1,42 @@ + +#ifndef UTILSCPP_EXPORT_H +#define UTILSCPP_EXPORT_H + +#ifdef OPENCLUTILSCPP_STATIC_DEFINE +# define UTILSCPP_EXPORT +# define OPENCLUTILSCPP_NO_EXPORT +#else +# ifndef UTILSCPP_EXPORT +# ifdef OpenCLUtilsCpp_EXPORTS + /* We are building this library */ +# define UTILSCPP_EXPORT +# else + /* We are using this library */ +# define UTILSCPP_EXPORT +# endif +# endif + +# ifndef OPENCLUTILSCPP_NO_EXPORT +# define OPENCLUTILSCPP_NO_EXPORT +# endif +#endif + +#ifndef OPENCLUTILSCPP_DEPRECATED +# define OPENCLUTILSCPP_DEPRECATED __declspec(deprecated) +#endif + +#ifndef OPENCLUTILSCPP_DEPRECATED_EXPORT +# define OPENCLUTILSCPP_DEPRECATED_EXPORT UTILSCPP_EXPORT OPENCLUTILSCPP_DEPRECATED +#endif + +#ifndef OPENCLUTILSCPP_DEPRECATED_NO_EXPORT +# define OPENCLUTILSCPP_DEPRECATED_NO_EXPORT OPENCLUTILSCPP_NO_EXPORT OPENCLUTILSCPP_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef OPENCLUTILSCPP_NO_DEPRECATED +# define OPENCLUTILSCPP_NO_DEPRECATED +# endif +#endif + +#endif /* UTILSCPP_EXPORT_H */ diff --git a/CL/Utils/OpenCLUtils_Export.h b/CL/Utils/OpenCLUtils_Export.h new file mode 100644 index 000000000..2db857ec0 --- /dev/null +++ b/CL/Utils/OpenCLUtils_Export.h @@ -0,0 +1,42 @@ + +#ifndef UTILS_EXPORT_H +#define UTILS_EXPORT_H + +#ifdef OPENCLUTILS_STATIC_DEFINE +# define UTILS_EXPORT +# define OPENCLUTILS_NO_EXPORT +#else +# ifndef UTILS_EXPORT +# ifdef OpenCLUtils_EXPORTS + /* We are building this library */ +# define UTILS_EXPORT +# else + /* We are using this library */ +# define UTILS_EXPORT +# endif +# endif + +# ifndef OPENCLUTILS_NO_EXPORT +# define OPENCLUTILS_NO_EXPORT +# endif +#endif + +#ifndef OPENCLUTILS_DEPRECATED +# define OPENCLUTILS_DEPRECATED __declspec(deprecated) +#endif + +#ifndef OPENCLUTILS_DEPRECATED_EXPORT +# define OPENCLUTILS_DEPRECATED_EXPORT UTILS_EXPORT OPENCLUTILS_DEPRECATED +#endif + +#ifndef OPENCLUTILS_DEPRECATED_NO_EXPORT +# define OPENCLUTILS_DEPRECATED_NO_EXPORT OPENCLUTILS_NO_EXPORT OPENCLUTILS_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef OPENCLUTILS_NO_DEPRECATED +# define OPENCLUTILS_NO_DEPRECATED +# endif +#endif + +#endif /* UTILS_EXPORT_H */ diff --git a/CL/Utils/Platform.hpp b/CL/Utils/Platform.hpp new file mode 100644 index 000000000..9957d76ba --- /dev/null +++ b/CL/Utils/Platform.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "OpenCLUtilsCpp_Export.h" +#include + +#include + +namespace cl { +namespace util { + bool UTILSCPP_EXPORT supports_extension(const cl::Platform& platform, + const cl::string& extension); + + bool UTILSCPP_EXPORT platform_version_contains( + const cl::Platform& platform, const cl::string& version_fragment); +} +} diff --git a/CL/Utils/Utils.h b/CL/Utils/Utils.h new file mode 100644 index 000000000..9d1c2471f --- /dev/null +++ b/CL/Utils/Utils.h @@ -0,0 +1,11 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +#include +#include +#include + +// OpenCL includes +#include diff --git a/CL/Utils/Utils.hpp b/CL/Utils/Utils.hpp new file mode 100644 index 000000000..40dd8d795 --- /dev/null +++ b/CL/Utils/Utils.hpp @@ -0,0 +1,14 @@ +#pragma once + +// OpenCL Utils includes +#include "OpenCLUtils_Export.h" + +#include +#include +#include +#include +#include +#include + +// OpenCL includes +#include diff --git a/CL/cl.h b/CL/cl.h new file mode 100644 index 000000000..6c700ab17 --- /dev/null +++ b/CL/cl.h @@ -0,0 +1,1936 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_ulong cl_properties; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_properties cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_properties cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif +#ifdef CL_VERSION_3_0 +typedef cl_bitfield cl_device_atomic_capabilities; +typedef cl_bitfield cl_device_device_enqueue_capabilities; +typedef cl_uint cl_khronos_vendor_id; +typedef cl_properties cl_mem_properties; +typedef cl_uint cl_version; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#if defined(__GNUC__) + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#if defined(_MSC_VER) && !defined(__STDC__) +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */ +#endif +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc11-extensions" /* Prevents warning about nameless union being C11 extension*/ +#endif +#if defined(_MSC_VER) && defined(__STDC__) + /* Anonymous unions are not supported in /Za builds */ +#else + union { +#endif +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 +#if defined(_MSC_VER) && defined(__STDC__) + /* Anonymous unions are not supported in /Za builds */ +#else + cl_mem mem_object; + }; +#endif +#if defined(_MSC_VER) && !defined(__STDC__) +#pragma warning( pop ) +#endif +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +#ifdef CL_VERSION_3_0 + +#define CL_NAME_VERSION_MAX_NAME_SIZE 64 + +typedef struct _cl_name_version { + cl_version version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE]; +} cl_name_version; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PLATFORM_NUMERIC_VERSION 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_NUMERIC_VERSION 0x105E +#define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 +#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 +#define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 +#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 +#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 +/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#define CL_DEVICE_OPENCL_C_FEATURES 0x106F +#define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES 0x1070 +#define CL_DEVICE_PIPE_SUPPORT 0x1071 +#define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED 0x1072 +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif +#ifdef CL_VERSION_3_0 +#define CL_QUEUE_PROPERTIES_ARRAY 0x1098 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif +#ifdef CL_VERSION_3_0 +#define CL_MEM_PROPERTIES 0x110A +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + + +/* cl_pipe_info */ +#ifdef CL_VERSION_2_0 +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PIPE_PROPERTIES 0x1122 +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif +#ifdef CL_VERSION_3_0 +#define CL_SAMPLER_PROPERTIES 0x1158 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif +#ifdef CL_VERSION_3_0 +#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#ifdef CL_VERSION_1_1 +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +/* cl_device_atomic_capabilities - bitfield */ +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) +#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) +#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) +#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) +#define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) +#endif + +/* cl_device_device_enqueue_capabilities - bitfield */ +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_QUEUE_SUPPORTED (1 << 0) +#define CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT (1 << 1) +#endif + +/* cl_khronos_vendor_id */ +#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 + +#ifdef CL_VERSION_3_0 + +/* cl_version */ +#define CL_VERSION_MAJOR_BITS (10) +#define CL_VERSION_MINOR_BITS (10) +#define CL_VERSION_PATCH_BITS (12) + +#define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) +#define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) +#define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) + +#define CL_VERSION_MAJOR(version) \ + ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) + +#define CL_VERSION_MINOR(version) \ + (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) + +#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) + +#define CL_MAKE_VERSION(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK) \ + << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ + (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ + ((patch) & CL_VERSION_PATCH_MASK)) + +#endif + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_3_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetContextDestructorCallback(cl_context context, + void (CL_CALLBACK* pfn_notify)(cl_context context, + void* user_data), + void* user_data) CL_API_SUFFIX__VERSION_3_0; + +#endif + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_3_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImageWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_origin, + const size_t * host_origin, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/CL/cl2.hpp b/CL/cl2.hpp new file mode 100644 index 000000000..a332962a8 --- /dev/null +++ b/CL/cl2.hpp @@ -0,0 +1,18 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#pragma message("cl2.hpp has been renamed to opencl.hpp to make it clear that it supports all versions of OpenCL. Please include opencl.hpp directly.") diff --git a/CL/cl_d3d10.h b/CL/cl_d3d10.h new file mode 100644 index 000000000..0d9950bed --- /dev/null +++ b/CL/cl_d3d10.h @@ -0,0 +1,154 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_D3D10_H +#define __OPENCL_CL_D3D10_H + +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) +#endif +#endif +#include +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( pop ) +#endif +#endif +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d10_sharing */ +#define cl_khr_d3d10_sharing 1 + +typedef cl_uint cl_d3d10_device_source_khr; +typedef cl_uint cl_d3d10_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D10_DEVICE_KHR -1002 +#define CL_INVALID_D3D10_RESOURCE_KHR -1003 +#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 +#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 + +/* cl_d3d10_device_source_nv */ +#define CL_D3D10_DEVICE_KHR 0x4010 +#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 + +/* cl_d3d10_device_set_nv */ +#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 +#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 + +/* cl_context_info */ +#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 +#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C + +/* cl_mem_info */ +#define CL_MEM_D3D10_RESOURCE_KHR 0x4015 + +/* cl_image_info */ +#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 +#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 + +/******************************************************************************/ + +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +/*************************************************************** +* cl_intel_sharing_format_query_d3d10 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d10 1 + +/* when cl_khr_d3d10_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D10TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D10TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D10_H */ + diff --git a/CL/cl_d3d11.h b/CL/cl_d3d11.h new file mode 100644 index 000000000..9393e5c84 --- /dev/null +++ b/CL/cl_d3d11.h @@ -0,0 +1,156 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_D3D11_H +#define __OPENCL_CL_D3D11_H + +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) +#endif +#endif +#include +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( pop ) +#endif +#endif +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d11_sharing */ +#define cl_khr_d3d11_sharing 1 + +typedef cl_uint cl_d3d11_device_source_khr; +typedef cl_uint cl_d3d11_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D11_DEVICE_KHR -1006 +#define CL_INVALID_D3D11_RESOURCE_KHR -1007 +#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 +#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 + +/* cl_d3d11_device_source */ +#define CL_D3D11_DEVICE_KHR 0x4019 +#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A + +/* cl_d3d11_device_set */ +#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B +#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C + +/* cl_context_info */ +#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D +#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D + +/* cl_mem_info */ +#define CL_MEM_D3D11_RESOURCE_KHR 0x401E + +/* cl_image_info */ +#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 +#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 + +/******************************************************************************/ + +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +/*************************************************************** +* cl_intel_sharing_format_query_d3d11 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d11 1 + +/* when cl_khr_d3d11_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D11TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D11TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D11_H */ + diff --git a/CL/cl_dx9_media_sharing.h b/CL/cl_dx9_media_sharing.h new file mode 100644 index 000000000..fd03bbdc2 --- /dev/null +++ b/CL/cl_dx9_media_sharing.h @@ -0,0 +1,268 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H +#define __OPENCL_CL_DX9_MEDIA_SHARING_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ +/* cl_khr_dx9_media_sharing */ +#define cl_khr_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_media_adapter_type_khr; +typedef cl_uint cl_dx9_media_adapter_set_khr; + +#if defined(_WIN32) +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) +#endif +#endif +#include +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( pop ) +#endif +#endif +typedef struct _cl_dx9_surface_info_khr +{ + IDirect3DSurface9 *resource; + HANDLE shared_handle; +} cl_dx9_surface_info_khr; +#endif + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 +#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 +#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 +#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 + +/* cl_media_adapter_type_khr */ +#define CL_ADAPTER_D3D9_KHR 0x2020 +#define CL_ADAPTER_D3D9EX_KHR 0x2021 +#define CL_ADAPTER_DXVA_KHR 0x2022 + +/* cl_media_adapter_set_khr */ +#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 +#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 + +/* cl_context_info */ +#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 +#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 +#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 + +/* cl_mem_info */ +#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 +#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 + +/* cl_image_info */ +#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B +#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C + +/******************************************************************************/ + +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr * media_adapter_type, + void * media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void * surface_info, + cl_uint plane, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +/*************************************** +* cl_intel_dx9_media_sharing extension * +****************************************/ + +#define cl_intel_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_device_source_intel; +typedef cl_uint cl_dx9_device_set_intel; + +/* error codes */ +#define CL_INVALID_DX9_DEVICE_INTEL -1010 +#define CL_INVALID_DX9_RESOURCE_INTEL -1011 +#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 +#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 + +/* cl_dx9_device_source_intel */ +#define CL_D3D9_DEVICE_INTEL 0x4022 +#define CL_D3D9EX_DEVICE_INTEL 0x4070 +#define CL_DXVA_DEVICE_INTEL 0x4071 + +/* cl_dx9_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 +#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 + +/* cl_context_info */ +#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 +#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 +#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 + +/* cl_mem_info */ +#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 +#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 + +/* cl_image_info */ +#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A +#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B +/******************************************************************************/ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_dx9 +***************************************************************/ +#define cl_intel_sharing_format_query_dx9 1 + +/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedDX9MediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */ + diff --git a/CL/cl_dx9_media_sharing_intel.h b/CL/cl_dx9_media_sharing_intel.h new file mode 100644 index 000000000..f6518d7f6 --- /dev/null +++ b/CL/cl_dx9_media_sharing_intel.h @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#include +#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.") diff --git a/CL/cl_egl.h b/CL/cl_egl.h new file mode 100644 index 000000000..357a37c02 --- /dev/null +++ b/CL/cl_egl.h @@ -0,0 +1,120 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_EGL_H +#define __OPENCL_CL_EGL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ +#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F +#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D +#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E + +/* Error type for clCreateFromEGLImageKHR */ +#define CL_INVALID_EGL_OBJECT_KHR -1093 +#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 + +/* CLeglImageKHR is an opaque handle to an EGLImage */ +typedef void* CLeglImageKHR; + +/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ +typedef void* CLeglDisplayKHR; + +/* CLeglSyncKHR is an opaque handle to an EGLSync object */ +typedef void* CLeglSyncKHR; + +/* properties passed to clCreateFromEGLImageKHR */ +typedef intptr_t cl_egl_image_properties_khr; + + +#define cl_khr_egl_image 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromEGLImageKHR(cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( + cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#define cl_khr_egl_event 1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromEGLSyncKHR(cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( + cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_EGL_H */ diff --git a/CL/cl_ext.h b/CL/cl_ext.h new file mode 100644 index 000000000..3eba7ed1b --- /dev/null +++ b/CL/cl_ext.h @@ -0,0 +1,2634 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/*************************************************************** +* cl_khr_command_buffer +***************************************************************/ +#define cl_khr_command_buffer 1 +#define CL_KHR_COMMAND_BUFFER_EXTENSION_NAME \ + "cl_khr_command_buffer" + +typedef cl_bitfield cl_device_command_buffer_capabilities_khr; +typedef struct _cl_command_buffer_khr* cl_command_buffer_khr; +typedef cl_uint cl_sync_point_khr; +typedef cl_uint cl_command_buffer_info_khr; +typedef cl_uint cl_command_buffer_state_khr; +typedef cl_properties cl_command_buffer_properties_khr; +typedef cl_bitfield cl_command_buffer_flags_khr; +typedef cl_properties cl_ndrange_kernel_command_properties_khr; +typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; + +/* cl_device_info */ +#define CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 +#define CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA + +/* cl_device_command_buffer_capabilities_khr - bitfield */ +#define CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (1 << 0) +#define CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (1 << 1) +#define CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (1 << 2) +#define CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (1 << 3) + +/* cl_command_buffer_properties_khr */ +#define CL_COMMAND_BUFFER_FLAGS_KHR 0x1293 + +/* cl_command_buffer_flags_khr */ +#define CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (1 << 0) + +/* Error codes */ +#define CL_INVALID_COMMAND_BUFFER_KHR -1138 +#define CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139 +#define CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140 + +/* cl_command_buffer_info_khr */ +#define CL_COMMAND_BUFFER_QUEUES_KHR 0x1294 +#define CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295 +#define CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296 +#define CL_COMMAND_BUFFER_STATE_KHR 0x1297 +#define CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298 + +/* cl_command_buffer_state_khr */ +#define CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0 +#define CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 1 +#define CL_COMMAND_BUFFER_STATE_PENDING_KHR 2 +#define CL_COMMAND_BUFFER_STATE_INVALID_KHR 3 + +/* cl_command_type */ +#define CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8 + + +typedef cl_command_buffer_khr (CL_API_CALL * +clCreateCommandBufferKHR_fn)( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clFinalizeCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clRetainCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clReleaseCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clEnqueueCommandBufferKHR_fn)( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clCommandBarrierWithWaitListKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferRectKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferToImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageToBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandNDRangeKernelKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clGetCommandBufferInfoKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_command_buffer_khr CL_API_CALL +clCreateCommandBufferKHR( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinalizeCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCommandBufferKHR( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandBarrierWithWaitListKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferRectKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferToImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageToBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandNDRangeKernelKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandBufferInfoKHR( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_khr_command_buffer_mutable_dispatch +***************************************************************/ +#define cl_khr_command_buffer_mutable_dispatch 1 +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME \ + "cl_khr_command_buffer_mutable_dispatch" + +typedef cl_uint cl_command_buffer_structure_type_khr; +typedef cl_bitfield cl_mutable_dispatch_fields_khr; +typedef cl_uint cl_mutable_command_info_khr; +typedef struct _cl_mutable_dispatch_arg_khr { + cl_uint arg_index; + size_t arg_size; + const void* arg_value; +} cl_mutable_dispatch_arg_khr; +typedef struct _cl_mutable_dispatch_exec_info_khr { + cl_uint param_name; + size_t param_value_size; + const void* param_value; +} cl_mutable_dispatch_exec_info_khr; +typedef struct _cl_mutable_dispatch_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_mutable_command_khr command; + cl_uint num_args; + cl_uint num_svm_args; + cl_uint num_exec_infos; + cl_uint work_dim; + const cl_mutable_dispatch_arg_khr* arg_list; + const cl_mutable_dispatch_arg_khr* arg_svm_list; + const cl_mutable_dispatch_exec_info_khr* exec_info_list; + const size_t* global_work_offset; + const size_t* global_work_size; + const size_t* local_work_size; +} cl_mutable_dispatch_config_khr; +typedef struct _cl_mutable_base_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_uint num_mutable_dispatch; + const cl_mutable_dispatch_config_khr* mutable_dispatch_list; +} cl_mutable_base_config_khr; + +/* cl_command_buffer_flags_khr - bitfield */ +#define CL_COMMAND_BUFFER_MUTABLE_KHR (1 << 1) + +/* Error codes */ +#define CL_INVALID_MUTABLE_COMMAND_KHR -1141 + +/* cl_device_info */ +#define CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 + +/* cl_ndrange_kernel_command_properties_khr */ +#define CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 + +/* cl_mutable_dispatch_fields_khr - bitfield */ +#define CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (1 << 0) +#define CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (1 << 1) +#define CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (1 << 2) +#define CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (1 << 3) +#define CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (1 << 4) + +/* cl_mutable_command_info_khr */ +#define CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 +#define CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 +#define CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD +#define CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2 +#define CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3 +#define CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 +#define CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 + +/* cl_command_buffer_structure_type_khr */ +#define CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0 +#define CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1 + + +typedef cl_int (CL_API_CALL * +clUpdateMutableCommandsKHR_fn)( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +typedef cl_int (CL_API_CALL * +clGetMutableCommandInfoKHR_fn)( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clUpdateMutableCommandsKHR( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMutableCommandInfoKHR( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ + +#if CL_TARGET_OPENCL_VERSION <= 110 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorAPPLE( cl_mem memobj, + void (* pfn_notify)(cl_mem memobj, void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToSystemLogAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStdoutAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStderrAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_API_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + +typedef cl_int +(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + + +/******************************* + * cl_khr_il_program extension * + *******************************/ +#define cl_khr_il_program 1 + +/* New property to clGetDeviceInfo for retrieving supported intermediate + * languages + */ +#define CL_DEVICE_IL_VERSION_KHR 0x105B + +/* New property to clGetProgramInfo for retrieving for retrieving the IL of a + * program + */ +#define CL_PROGRAM_IL_KHR 0x1169 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithILKHR(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret); + +typedef cl_program +(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +/* Extension: cl_khr_image2d_from_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without + * a copy. The type associated with a 2D image created from a buffer in an + * OpenCL program is image2d_t. Both the sampler and sampler-less read_image + * built-in functions are supported for 2D images and 2D images created from + * a buffer. Similarly, the write_image built-ins are also supported for 2D + * images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the + * width, height, image format (i.e. channel order and channel data type) + * and optionally the row pitch. + * + * The pitch specified must be a multiple of + * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. + * The base address of the buffer must be aligned to + * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. + */ + +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B + + +/************************************** + * cl_khr_initialize_memory extension * + **************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_CONTEXT_TERMINATED_KHR -1121 + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 +#define CL_CONTEXT_TERMINATE_KHR 0x2032 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL +clTerminateContextKHR(cl_context context) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_API_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + + +/***************************************** + * cl_khr_create_command_queue extension * + *****************************************/ +#define cl_khr_create_command_queue 1 + +typedef cl_properties cl_queue_properties_khr; + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithPropertiesKHR(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_command_queue +(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ + +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ + +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 +#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 +#define CL_DEVICE_PCIE_ID_AMD 0x4034 + + +/********************************* +* cl_arm_printf extension +*********************************/ + +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 + + +/*********************************** +* cl_ext_device_fission extension +***********************************/ +#define cl_ext_device_fission 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_ulong cl_device_partition_property_ext; +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevicesEXT(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int +(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; + +/* cl_device_partition_property_ext */ +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + +/* clDeviceGetInfo selectors */ +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +/* error codes */ +#define CL_DEVICE_PARTITION_FAILED_EXT -1057 +#define CL_INVALID_PARTITION_COUNT_EXT -1058 +#define CL_INVALID_PARTITION_NAME_EXT -1059 + +/* CL_AFFINITY_DOMAINs */ +#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 +#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 +#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 +#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 +#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 +#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + +/* cl_device_partition_property_ext list terminators */ +#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + +/*********************************** + * cl_ext_migrate_memobject extension definitions + ***********************************/ +#define cl_ext_migrate_memobject 1 + +typedef cl_bitfield cl_mem_migration_flags_ext; + +#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 + +#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + +typedef cl_int +(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +/********************************* +* cl_ext_cxx_for_opencl extension +*********************************/ +#define cl_ext_cxx_for_opencl 1 + +#define CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230 + +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ +#define cl_qcom_ext_host_ptr 1 + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + /* Type of external memory allocation. */ + /* Legal values will be defined in layered extensions. */ + cl_uint allocation_type; + + /* Host cache policy for this external memory allocation. */ + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + + +/******************************************* +* cl_qcom_ext_host_ptr_iocoherent extension +********************************************/ + +/* Cache policy specifying io-coherence */ +#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 + + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* ION file descriptor */ + int ion_filedesc; + + /* Host pointer to the ION allocated memory */ + void* ion_hostptr; + +} cl_mem_ion_host_ptr; + + +/********************************* +* cl_qcom_android_native_buffer_host_ptr extension +*********************************/ + +#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 + +typedef struct _cl_mem_android_native_buffer_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* Virtual pointer to the android native buffer */ + void* anb_ptr; + +} cl_mem_android_native_buffer_host_ptr; + + +/****************************************** + * cl_img_yuv_image extension * + ******************************************/ + +/* Image formats used in clCreateImage */ +#define CL_NV21_IMG 0x40D0 +#define CL_YV12_IMG 0x40D1 + + +/****************************************** + * cl_img_cached_allocations extension * + ******************************************/ + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) +#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) + + +/****************************************** + * cl_img_use_gralloc_ptr extension * + ******************************************/ +#define cl_img_use_gralloc_ptr 1 + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 +#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 + +/* Error codes from clEnqueueAcquireGrallocObjectsIMG and clEnqueueReleaseGrallocObjectsIMG */ +#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 +#define CL_INVALID_GRALLOC_OBJECT_IMG 0x40D5 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +/****************************************** + * cl_img_generate_mipmap extension * + ******************************************/ +#define cl_img_generate_mipmap 1 + +typedef cl_uint cl_mipmap_filter_mode_img; + +/* To be used by clEnqueueGenerateMipmapIMG */ +#define CL_MIPMAP_FILTER_ANY_IMG 0x0 +#define CL_MIPMAP_FILTER_BOX_IMG 0x1 + +/* To be used by clGetEventInfo */ +#define CL_COMMAND_GENERATE_MIPMAP_IMG 0x40D6 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueGenerateMipmapIMG(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + cl_mipmap_filter_mode_img mipmap_filter_mode, + const size_t *array_region, + const size_t *mip_region, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/****************************************** + * cl_img_mem_properties extension * + ******************************************/ +#define cl_img_mem_properties 1 + +/* To be used by clCreateBufferWithProperties */ +#define CL_MEM_ALLOC_FLAGS_IMG 0x40D7 + +/* To be used wiith the CL_MEM_ALLOC_FLAGS_IMG property */ +typedef cl_bitfield cl_mem_alloc_flags_img; + +/* To be used with cl_mem_alloc_flags_img */ +#define CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG (1 << 0) + +/********************************* +* cl_khr_subgroups extension +*********************************/ +#define cl_khr_subgroups 1 + +#if !defined(CL_VERSION_2_1) +/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. + In hindsight, there should have been a khr suffix on this type for + the extension, but keeping it un-suffixed to maintain backwards + compatibility. */ +typedef cl_uint cl_kernel_sub_group_info; +#endif + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; + +typedef cl_int +(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; + + +/********************************* +* cl_khr_mipmap_image extension +*********************************/ + +/* cl_sampler_properties */ +#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 +#define CL_SAMPLER_LOD_MIN_KHR 0x1156 +#define CL_SAMPLER_LOD_MAX_KHR 0x1157 + + +/********************************* +* cl_khr_priority_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_priority_hints 1 + +typedef cl_uint cl_queue_priority_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_PRIORITY_KHR 0x1096 + +/* cl_queue_priority_khr */ +#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) +#define CL_QUEUE_PRIORITY_MED_KHR (1<<1) +#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_throttle_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_throttle_hints 1 + +typedef cl_uint cl_queue_throttle_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_THROTTLE_KHR 0x1097 + +/* cl_queue_throttle_khr */ +#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) +#define CL_QUEUE_THROTTLE_MED_KHR (1<<1) +#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_subgroup_named_barrier +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_subgroup_named_barrier 1 + +/* cl_device_info */ +#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 + + +/********************************* +* cl_khr_extended_versioning +*********************************/ + +#define cl_khr_extended_versioning 1 + +#define CL_VERSION_MAJOR_BITS_KHR (10) +#define CL_VERSION_MINOR_BITS_KHR (10) +#define CL_VERSION_PATCH_BITS_KHR (12) + +#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) +#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) +#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) + +#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) +#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) +#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) + +#define CL_MAKE_VERSION_KHR(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ + (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ + ((patch) & CL_VERSION_PATCH_MASK_KHR)) + +typedef cl_uint cl_version_khr; + +#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 + +typedef struct _cl_name_version_khr +{ + cl_version_khr version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; +} cl_name_version_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 + +/* cl_device_info */ +#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E +#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F +#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 + + +/********************************* +* cl_khr_device_uuid extension +*********************************/ +#define cl_khr_device_uuid 1 + +#define CL_UUID_SIZE_KHR 16 +#define CL_LUID_SIZE_KHR 8 + +#define CL_DEVICE_UUID_KHR 0x106A +#define CL_DRIVER_UUID_KHR 0x106B +#define CL_DEVICE_LUID_VALID_KHR 0x106C +#define CL_DEVICE_LUID_KHR 0x106D +#define CL_DEVICE_NODE_MASK_KHR 0x106E + + +/*************************************************************** +* cl_khr_pci_bus_info +***************************************************************/ +#define cl_khr_pci_bus_info 1 + +typedef struct _cl_device_pci_bus_info_khr { + cl_uint pci_domain; + cl_uint pci_bus; + cl_uint pci_device; + cl_uint pci_function; +} cl_device_pci_bus_info_khr; + +/* cl_device_info */ +#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F + + +/*************************************************************** +* cl_khr_suggested_local_work_size +***************************************************************/ +#define cl_khr_suggested_local_work_size 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSuggestedLocalWorkSizeKHR( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetKernelSuggestedLocalWorkSizeKHR_fn)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + + +/*************************************************************** +* cl_khr_integer_dot_product +***************************************************************/ +#define cl_khr_integer_dot_product 1 + +typedef cl_bitfield cl_device_integer_dot_product_capabilities_khr; + +/* cl_device_integer_dot_product_capabilities_khr */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0) +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1) + +typedef struct _cl_device_integer_dot_product_acceleration_properties_khr { + cl_bool signed_accelerated; + cl_bool unsigned_accelerated; + cl_bool mixed_signedness_accelerated; + cl_bool accumulating_saturating_signed_accelerated; + cl_bool accumulating_saturating_unsigned_accelerated; + cl_bool accumulating_saturating_mixed_signedness_accelerated; +} cl_device_integer_dot_product_acceleration_properties_khr; + +/* cl_device_info */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075 + + +/*************************************************************** +* cl_khr_external_memory +***************************************************************/ +#define cl_khr_external_memory 1 + +typedef cl_uint cl_external_memory_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 + +/* cl_device_info */ +#define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F + +/* cl_mem_properties */ +#define CL_DEVICE_HANDLE_LIST_KHR 0x2051 +#define CL_DEVICE_HANDLE_LIST_END_KHR 0 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047 +#define CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048 + + +typedef cl_int (CL_API_CALL * +clEnqueueAcquireExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clEnqueueReleaseExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +/*************************************************************** +* cl_khr_external_memory_dma_buf +***************************************************************/ +#define cl_khr_external_memory_dma_buf 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067 + +/*************************************************************** +* cl_khr_external_memory_dx +***************************************************************/ +#define cl_khr_external_memory_dx 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066 + +/*************************************************************** +* cl_khr_external_memory_opaque_fd +***************************************************************/ +#define cl_khr_external_memory_opaque_fd 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060 + +/*************************************************************** +* cl_khr_external_memory_win32 +***************************************************************/ +#define cl_khr_external_memory_win32 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061 +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062 + +/*************************************************************** +* cl_khr_external_semaphore +***************************************************************/ +#define cl_khr_external_semaphore 1 + +typedef struct _cl_semaphore_khr * cl_semaphore_khr; +typedef cl_uint cl_external_semaphore_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 +#define CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D +#define CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E + +/* cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 + + +typedef cl_int (CL_API_CALL * +clGetSemaphoreHandleForTypeKHR_fn)( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreHandleForTypeKHR( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +/*************************************************************** +* cl_khr_external_semaphore_dx_fence +***************************************************************/ +#define cl_khr_external_semaphore_dx_fence 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059 + +/*************************************************************** +* cl_khr_external_semaphore_opaque_fd +***************************************************************/ +#define cl_khr_external_semaphore_opaque_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055 + +/*************************************************************** +* cl_khr_external_semaphore_sync_fd +***************************************************************/ +#define cl_khr_external_semaphore_sync_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058 + +/*************************************************************** +* cl_khr_external_semaphore_win32 +***************************************************************/ +#define cl_khr_external_semaphore_win32 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056 +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057 + +/*************************************************************** +* cl_khr_semaphore +***************************************************************/ +#define cl_khr_semaphore 1 + +/* type cl_semaphore_khr */ +typedef cl_properties cl_semaphore_properties_khr; +typedef cl_uint cl_semaphore_info_khr; +typedef cl_uint cl_semaphore_type_khr; +typedef cl_ulong cl_semaphore_payload_khr; + +/* cl_semaphore_type */ +#define CL_SEMAPHORE_TYPE_BINARY_KHR 1 + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C + +/* cl_semaphore_info_khr */ +#define CL_SEMAPHORE_CONTEXT_KHR 0x2039 +#define CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A +#define CL_SEMAPHORE_PROPERTIES_KHR 0x203B +#define CL_SEMAPHORE_PAYLOAD_KHR 0x203C + +/* cl_semaphore_info_khr or cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_TYPE_KHR 0x203D +/* enum CL_DEVICE_HANDLE_LIST_KHR */ +/* enum CL_DEVICE_HANDLE_LIST_END_KHR */ + +/* cl_command_type */ +#define CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042 +#define CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043 + +/* Error codes */ +#define CL_INVALID_SEMAPHORE_KHR -1142 + + +typedef cl_semaphore_khr (CL_API_CALL * +clCreateSemaphoreWithPropertiesKHR_fn)( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueWaitSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueSignalSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clGetSemaphoreInfoKHR_fn)( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clReleaseSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clRetainSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_semaphore_khr CL_API_CALL +clCreateSemaphoreWithPropertiesKHR( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWaitSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSignalSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreInfoKHR( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +/********************************** + * cl_arm_import_memory extension * + **********************************/ +#define cl_arm_import_memory 1 + +typedef intptr_t cl_import_properties_arm; + +/* Default and valid proporties name for cl_arm_import_memory */ +#define CL_IMPORT_TYPE_ARM 0x40B2 + +/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_HOST_ARM 0x40B3 + +/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 + +/* Protected memory property */ +#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 + +/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 + +/* Data consistency with host property */ +#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 + +/* Index of plane in a multiplanar hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_PLANE_INDEX_ARM 0x41EF + +/* Index of layer in a multilayer hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_LAYER_INDEX_ARM 0x41F0 + +/* Import memory size value to indicate a size for the whole buffer */ +#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX + +/* This extension adds a new function that allows for direct memory import into + * OpenCL via the clImportMemoryARM function. + * + * Memory imported through this interface will be mapped into the device's page + * tables directly, providing zero copy access. It will never fall back to copy + * operations and aliased buffers. + * + * Types of memory supported for import are specified as additional extension + * strings. + * + * This extension produces cl_mem allocations which are compatible with all other + * users of cl_mem in the standard API. + * + * This extension maps pages with the same properties as the normal buffer creation + * function clCreateBuffer. + */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clImportMemoryARM( cl_context context, + cl_mem_flags flags, + const cl_import_properties_arm *properties, + void *memory, + size_t size, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + + +/****************************************** + * cl_arm_shared_virtual_memory extension * + ******************************************/ +#define cl_arm_shared_virtual_memory 1 + +/* Used by clGetDeviceInfo */ +#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 + +/* Used by clGetMemObjectInfo */ +#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 + +/* Used by clSetKernelExecInfoARM: */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_SVM_FREE_ARM 0x40BA +#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB +#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC +#define CL_COMMAND_SVM_MAP_ARM 0x40BD +#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE + +/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) + +/* Flag values used by clSVMAllocARM: */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) +#define CL_MEM_SVM_ATOMICS_ARM (1 << 11) + +typedef cl_bitfield cl_svm_mem_flags_arm; +typedef cl_uint cl_kernel_exec_info_arm; +typedef cl_bitfield cl_device_svm_capabilities_arm; + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAllocARM(cl_context context, + cl_svm_mem_flags_arm flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFreeARM(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFreeARM(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyARM(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillARM(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMapARM(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmapARM(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointerARM(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfoARM(cl_kernel kernel, + cl_kernel_exec_info_arm param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_1_2; + +/******************************** + * cl_arm_get_core_id extension * + ********************************/ + +#ifdef CL_VERSION_1_2 + +#define cl_arm_get_core_id 1 + +/* Device info property for bitfield of cores present */ +#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF + +#endif /* CL_VERSION_1_2 */ + +/********************************* +* cl_arm_job_slot_selection +*********************************/ + +#define cl_arm_job_slot_selection 1 + +/* cl_device_info */ +#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 + +/* cl_command_queue_properties */ +#define CL_QUEUE_JOB_SLOT_ARM 0x41E1 + +/********************************* +* cl_arm_scheduling_controls +*********************************/ + +#define cl_arm_scheduling_controls 1 + +typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm; + +/* cl_device_info */ +#define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4 + +#define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM (1 << 0) +#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1) +#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2) +#define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM (1 << 3) +#define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM (1 << 4) +#define CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM (1 << 5) +#define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM (1 << 6) + +#define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM 0x41EB +#define CL_DEVICE_MAX_WARP_COUNT_ARM 0x41EA + +/* cl_kernel_info */ +#define CL_KERNEL_MAX_WARP_COUNT_ARM 0x41E9 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5 +#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6 +#define CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM 0x41E8 +#define CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM 0x41F1 + +/* cl_queue_properties */ +#define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7 +#define CL_QUEUE_DEFERRED_FLUSH_ARM 0x41EC + +/************************************** +* cl_arm_controlled_kernel_termination +***************************************/ + +#define cl_arm_controlled_kernel_termination 1 + +/* Error code to indicate kernel terminated with failure */ +#define CL_COMMAND_TERMINATED_ITSELF_WITH_FAILURE_ARM -1108 + +/* cl_device_info */ +#define CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM 0x41EE + +/* Bit fields for controlled termination feature query */ +typedef cl_bitfield cl_device_controlled_termination_capabilities_arm; + +#define CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM (1 << 0) +#define CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM (1 << 1) +#define CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM (1 << 2) + +/* cl_event_info */ +#define CL_EVENT_COMMAND_TERMINATION_REASON_ARM 0x41ED + +/* Values returned for event termination reason query */ +typedef cl_uint cl_command_termination_reason_arm; + +#define CL_COMMAND_TERMINATION_COMPLETION_ARM 0 +#define CL_COMMAND_TERMINATION_CONTROLLED_SUCCESS_ARM 1 +#define CL_COMMAND_TERMINATION_CONTROLLED_FAILURE_ARM 2 +#define CL_COMMAND_TERMINATION_ERROR_ARM 3 + +/************************************* +* cl_arm_protected_memory_allocation * +*************************************/ + +#define cl_arm_protected_memory_allocation 1 + +#define CL_MEM_PROTECTED_ALLOC_ARM (1ULL << 36) + +/****************************************** +* cl_intel_exec_by_local_thread extension * +******************************************/ + +#define cl_intel_exec_by_local_thread 1 + +#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) + +/*************************************************************** +* cl_intel_device_attribute_query +***************************************************************/ + +#define cl_intel_device_attribute_query 1 + +typedef cl_bitfield cl_device_feature_capabilities_intel; + +/* cl_device_feature_capabilities_intel */ +#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) +#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1) + +/* cl_device_info */ +#define CL_DEVICE_IP_VERSION_INTEL 0x4250 +#define CL_DEVICE_ID_INTEL 0x4251 +#define CL_DEVICE_NUM_SLICES_INTEL 0x4252 +#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253 +#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254 +#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255 +#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256 + +/*********************************************** +* cl_intel_device_partition_by_names extension * +************************************************/ + +#define cl_intel_device_partition_by_names 1 + +#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 +#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 + +/************************************************ +* cl_intel_accelerator extension * +* cl_intel_motion_estimation extension * +* cl_intel_advanced_motion_estimation extension * +*************************************************/ + +#define cl_intel_accelerator 1 +#define cl_intel_motion_estimation 1 +#define cl_intel_advanced_motion_estimation 1 + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +typedef struct _cl_motion_estimation_desc_intel { + cl_uint mb_block_type; + cl_uint subpixel_mode; + cl_uint sad_adjust_mode; + cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +/* error codes */ +#define CL_INVALID_ACCELERATOR_INTEL -1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +/* cl_accelerator_type_intel */ +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +/* cl_accelerator_info_intel */ +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +/* cl_motion_detect_desc_intel flags */ +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + +#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 +#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 +#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 +#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 + +#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 +#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 +#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 + +#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 +#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 +#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 +#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 +#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 + +#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 +#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 +#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 +#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + +#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 + +#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +/* cl_device_info */ +#define CL_DEVICE_ME_VERSION_INTEL 0x407E + +#define CL_ME_VERSION_LEGACY_INTEL 0x0 +#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 +#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 + +extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL +clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +/****************************************** +* cl_intel_simultaneous_sharing extension * +*******************************************/ + +#define cl_intel_simultaneous_sharing 1 + +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +/*********************************** +* cl_intel_egl_image_yuv extension * +************************************/ + +#define cl_intel_egl_image_yuv 1 + +#define CL_EGL_YUV_PLANE_INTEL 0x4107 + +/******************************** +* cl_intel_packed_yuv extension * +*********************************/ + +#define cl_intel_packed_yuv 1 + +#define CL_YUYV_INTEL 0x4076 +#define CL_UYVY_INTEL 0x4077 +#define CL_YVYU_INTEL 0x4078 +#define CL_VYUY_INTEL 0x4079 + +/******************************************** +* cl_intel_required_subgroup_size extension * +*********************************************/ + +#define cl_intel_required_subgroup_size 1 + +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + +/**************************************** +* cl_intel_driver_diagnostics extension * +*****************************************/ + +#define cl_intel_driver_diagnostics 1 + +typedef cl_uint cl_diagnostics_verbose_level; + +#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 + +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) + +/******************************** +* cl_intel_planar_yuv extension * +*********************************/ + +#define CL_NV12_INTEL 0x410E + +#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) +#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) + +#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E +#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F + +/******************************************************* +* cl_intel_device_side_avc_motion_estimation extension * +********************************************************/ + +#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B +#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C +#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D + +#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ +#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ + +#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 +#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 +#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 +#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 + +#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 +#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 +#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 +#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 + +#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 +#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 +#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 + +#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 +#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E +#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D +#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B +#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 +#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F +#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F +#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F + +#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 +#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 +#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 +#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 +#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 +#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 +#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 +#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa + +#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 + +#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 + +#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 +#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 +#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 +#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B +#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 + +#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 +#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 +#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 +#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 + +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 + +#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) + +#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 +#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 + +#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 +#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 +#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 + +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 + +#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 + +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 +#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 +#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 + +#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 +#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 +#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 + +#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 +#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 + +/******************************************* +* cl_intel_unified_shared_memory extension * +********************************************/ +#define cl_intel_unified_shared_memory 1 + +typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; +typedef cl_properties cl_mem_properties_intel; +typedef cl_bitfield cl_mem_alloc_flags_intel; +typedef cl_uint cl_mem_info_intel; +typedef cl_uint cl_unified_shared_memory_type_intel; +typedef cl_uint cl_mem_advice_intel; + +/* cl_device_info */ +#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 +#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 +#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 +#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 +#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 + +/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ +#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) +#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 + +/* cl_mem_alloc_flags_intel - bitfield */ +#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL (1 << 1) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL (1 << 2) + +/* cl_mem_alloc_info_intel */ +#define CL_MEM_ALLOC_TYPE_INTEL 0x419A +#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B +#define CL_MEM_ALLOC_SIZE_INTEL 0x419C +#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D + +/* cl_unified_shared_memory_type_intel */ +#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 +#define CL_MEM_TYPE_HOST_INTEL 0x4197 +#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 +#define CL_MEM_TYPE_SHARED_INTEL 0x4199 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 +#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 +#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 +#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 + +/* cl_command_type */ +#define CL_COMMAND_MEMFILL_INTEL 0x4204 +#define CL_COMMAND_MEMCPY_INTEL 0x4205 +#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 +#define CL_COMMAND_MEMADVISE_INTEL 0x4207 + + +typedef void* (CL_API_CALL * +clHostMemAllocINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clDeviceMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clSharedMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clMemFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clMemBlockingFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clGetMemAllocInfoINTEL_fn)( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +typedef cl_int (CL_API_CALL * +clSetKernelArgMemPointerINTEL_fn)( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemFillINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemcpyINTEL_fn)( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemAdviseINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY void* CL_API_CALL +clHostMemAllocINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clDeviceMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clSharedMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemBlockingFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemAllocInfoINTEL( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgMemPointerINTEL( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemFillINTEL( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemcpyINTEL( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemAdviseINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#if defined(CL_VERSION_1_2) +/* Requires OpenCL 1.2 for cl_mem_migration_flags: */ + +typedef cl_int (CL_API_CALL * +clEnqueueMigrateMemINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#endif /* defined(CL_VERSION_1_2) */ + +/* deprecated, use clEnqueueMemFillINTEL instead */ + +typedef cl_int (CL_API_CALL * +clEnqueueMemsetINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemsetINTEL( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_intel_mem_alloc_buffer_location +***************************************************************/ +#define cl_intel_mem_alloc_buffer_location 1 +#define CL_INTEL_MEM_ALLOC_BUFFER_LOCATION_EXTENSION_NAME \ + "cl_intel_mem_alloc_buffer_location" + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_BUFFER_LOCATION_INTEL 0x419E + +/* cl_mem_alloc_info_intel */ +/* enum CL_MEM_ALLOC_BUFFER_LOCATION_INTEL */ + +/*************************************************** +* cl_intel_create_buffer_with_properties extension * +****************************************************/ + +#define cl_intel_create_buffer_with_properties 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithPropertiesINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL * +clCreateBufferWithPropertiesINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +/****************************************** +* cl_intel_mem_channel_property extension * +*******************************************/ + +#define CL_MEM_CHANNEL_INTEL 0x4213 + +/********************************* +* cl_intel_mem_force_host_memory * +**********************************/ + +#define cl_intel_mem_force_host_memory 1 + +/* cl_mem_flags */ +#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) + +/*************************************************************** +* cl_intel_command_queue_families +***************************************************************/ +#define cl_intel_command_queue_families 1 + +typedef cl_bitfield cl_command_queue_capabilities_intel; + +#define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL 64 + +typedef struct _cl_queue_family_properties_intel { + cl_command_queue_properties properties; + cl_command_queue_capabilities_intel capabilities; + cl_uint count; + char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; +} cl_queue_family_properties_intel; + +/* cl_device_info */ +#define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x418B + +/* cl_queue_properties */ +#define CL_QUEUE_FAMILY_INTEL 0x418C +#define CL_QUEUE_INDEX_INTEL 0x418D + +/* cl_command_queue_capabilities_intel */ +#define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL 0 +#define CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL (1 << 0) +#define CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL (1 << 1) +#define CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 2) +#define CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 3) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL (1 << 8) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL (1 << 9) +#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL (1 << 10) +#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL (1 << 11) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL (1 << 12) +#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL (1 << 13) +#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL (1 << 14) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL (1 << 15) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL (1 << 16) +#define CL_QUEUE_CAPABILITY_MARKER_INTEL (1 << 24) +#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25) +#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26) + +/*************************************************************** +* cl_intel_queue_no_sync_operations +***************************************************************/ + +#define cl_intel_queue_no_sync_operations 1 + +/* addition to cl_command_queue_properties */ +#define CL_QUEUE_NO_SYNC_OPERATIONS_INTEL (1 << 29) + +/*************************************************************** +* cl_intel_sharing_format_query +***************************************************************/ +#define cl_intel_sharing_format_query 1 + +/*************************************************************** +* cl_ext_image_requirements_info +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_requirements_info 1 + +typedef cl_uint cl_image_requirements_info_ext; + +#define CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT 0x1290 +#define CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT 0x1292 +#define CL_IMAGE_REQUIREMENTS_SIZE_EXT 0x12B2 +#define CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT 0x12B3 +#define CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT 0x12B4 +#define CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT 0x12B5 +#define CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT 0x12B6 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageRequirementsInfoEXT( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetImageRequirementsInfoEXT_fn)( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +/*************************************************************** +* cl_ext_image_from_buffer +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_from_buffer 1 + +#define CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT 0x1291 + +#endif + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/CL/cl_ext_intel.h b/CL/cl_ext_intel.h new file mode 100644 index 000000000..a7ae87a34 --- /dev/null +++ b/CL/cl_ext_intel.h @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#include +#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.") diff --git a/CL/cl_gl.h b/CL/cl_gl.h new file mode 100644 index 000000000..327746508 --- /dev/null +++ b/CL/cl_gl.h @@ -0,0 +1,194 @@ +/******************************************************************************* + * Copyright (c) 2008-2021 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#ifdef CL_VERSION_1_2 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +#endif + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#ifdef CL_VERSION_1_2 +#define CL_GL_NUM_SAMPLES 0x2012 +#endif + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem memobj, + cl_gl_object_type * gl_object_type, + cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync sync, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_gl +***************************************************************/ +#define cl_intel_sharing_format_query_gl 1 + +/* when cl_khr_gl_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedGLTextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedGLTextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/CL/cl_gl_ext.h b/CL/cl_gl_ext.h new file mode 100644 index 000000000..8ec818167 --- /dev/null +++ b/CL/cl_gl_ext.h @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2008-2021 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#include +#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.") diff --git a/CL/cl_half.h b/CL/cl_half.h new file mode 100644 index 000000000..ecc422332 --- /dev/null +++ b/CL/cl_half.h @@ -0,0 +1,440 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +/** + * This is a header-only utility library that provides OpenCL host code with + * routines for converting to/from cl_half values. + * + * Example usage: + * + * #include + * ... + * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); + * cl_float f = cl_half_to_float(h); + */ + +#ifndef OPENCL_CL_HALF_H +#define OPENCL_CL_HALF_H + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Rounding mode used when converting to cl_half. + */ +typedef enum +{ + CL_HALF_RTE, // round to nearest even + CL_HALF_RTZ, // round towards zero + CL_HALF_RTP, // round towards positive infinity + CL_HALF_RTN, // round towards negative infinity +} cl_half_rounding_mode; + + +/* Private utility macros. */ +#define CL_HALF_EXP_MASK 0x7C00 +#define CL_HALF_MAX_FINITE_MAG 0x7BFF + + +/* + * Utility to deal with values that overflow when converting to half precision. + */ +static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTZ) + { + // Round overflow towards zero -> largest finite number (preserving sign) + return (sign << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTP && sign) + { + // Round negative overflow towards positive infinity -> most negative finite number + return (1 << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTN && !sign) + { + // Round positive overflow towards negative infinity -> largest finite number + return CL_HALF_MAX_FINITE_MAG; + } + + // Overflow to infinity + return (sign << 15) | CL_HALF_EXP_MASK; +} + +/* + * Utility to deal with values that underflow when converting to half precision. + */ +static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTP && !sign) + { + // Round underflow towards positive infinity -> smallest positive value + return (sign << 15) | 1; + } + else if (rounding_mode == CL_HALF_RTN && sign) + { + // Round underflow towards negative infinity -> largest negative value + return (sign << 15) | 1; + } + + // Flush to zero + return (sign << 15); +} + + +/** + * Convert a cl_float to a cl_half. + */ +static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + f32.f = f; + + // Extract sign bit + uint16_t sign = f32.i >> 31; + + // Extract FP32 exponent and mantissa + uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; + uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); + + // Remove FP32 exponent bias + int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; + + // Check for NaN / infinity + if (f_exp == 0xFF) + { + if (f_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); + h_mant |= 0x200; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | CL_HALF_EXP_MASK; + } + } + + // Check for zero + if (!f_exp && !f_mant) + { + return (sign << 15); + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + return cl_half_handle_overflow(rounding_mode, sign); + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + return cl_half_handle_underflow(rounding_mode, sign); + } + + // Check for value that will become denormal + if (exp < -14) + { + // Denormal -> include the implicit 1 from the FP32 mantissa + h_exp = 0; + f_mant |= 1 << (CL_FLT_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); + } + + // Generate FP16 mantissa by shifting FP32 mantissa + uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); + + // Check whether we need to round + uint32_t halfway = 1 << (lsb_pos - 1); + uint32_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((f_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((f_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((f_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((f_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_double to a cl_half. + */ +static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_double d; + uint64_t i; + } f64; + f64.d = d; + + // Extract sign bit + uint16_t sign = f64.i >> 63; + + // Extract FP64 exponent and mantissa + uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; + uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); + + // Remove FP64 exponent bias + int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; + + // Check for NaN / infinity + if (d_exp == 0x7FF) + { + if (d_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); + h_mant |= 0x200; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | CL_HALF_EXP_MASK; + } + } + + // Check for zero + if (!d_exp && !d_mant) + { + return (sign << 15); + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + return cl_half_handle_overflow(rounding_mode, sign); + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + return cl_half_handle_underflow(rounding_mode, sign); + } + + // Check for value that will become denormal + if (exp < -14) + { + // Include the implicit 1 from the FP64 mantissa + h_exp = 0; + d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); + } + + // Generate FP16 mantissa by shifting FP64 mantissa + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); + + // Check whether we need to round + uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); + uint64_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((d_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((d_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((d_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((d_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_half to a cl_float. + */ +static inline cl_float cl_half_to_float(cl_half h) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + + // Extract sign bit + uint16_t sign = h >> 15; + + // Extract FP16 exponent and mantissa + uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = h & 0x3FF; + + // Remove FP16 exponent bias + int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; + + // Add FP32 exponent bias + uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; + + // Check for NaN / infinity + if (h_exp == 0x1F) + { + if (h_mant) + { + // NaN -> propagate mantissa and silence it + uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); + f_mant |= 0x400000; + f32.i = (sign << 31) | 0x7F800000 | f_mant; + return f32.f; + } + else + { + // Infinity -> zero mantissa + f32.i = (sign << 31) | 0x7F800000; + return f32.f; + } + } + + // Check for zero / denormal + if (h_exp == 0) + { + if (h_mant == 0) + { + // Zero -> zero exponent + f_exp = 0; + } + else + { + // Denormal -> normalize it + // - Shift mantissa to make most-significant 1 implicit + // - Adjust exponent accordingly + uint32_t shift = 0; + while ((h_mant & 0x400) == 0) + { + h_mant <<= 1; + shift++; + } + h_mant &= 0x3FF; + f_exp -= shift - 1; + } + } + + f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); + return f32.f; +} + + +#undef CL_HALF_EXP_MASK +#undef CL_HALF_MAX_FINITE_MAG + + +#ifdef __cplusplus +} +#endif + + +#endif /* OPENCL_CL_HALF_H */ diff --git a/CL/cl_icd.h b/CL/cl_icd.h new file mode 100644 index 000000000..360b87030 --- /dev/null +++ b/CL/cl_icd.h @@ -0,0 +1,1294 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef OPENCL_CL_ICD_H +#define OPENCL_CL_ICD_H + +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file contains pointer type definitions for each of the CL API calls as + * well as a type definition for the dispatch table used by the Khronos ICD + * loader (see cl_khr_icd extension specification for background). + */ + +/* API function pointer definitions */ + +// Platform APIs +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( + cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( + cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( + cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( + cl_device_id device, cl_device_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( + cl_device_id in_device, + const cl_device_partition_property *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef cl_int(CL_API_CALL *cl_api_clRetainDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateSubDevices; +typedef void *cl_api_clRetainDevice; +typedef void *cl_api_clReleaseDevice; + +#endif + +// Context APIs +typedef cl_context(CL_API_CALL *cl_api_clCreateContext)( + const cl_context_properties *properties, cl_uint num_devices, + const cl_device_id *devices, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( + const cl_context_properties *properties, cl_device_type device_type, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clRetainContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetContextInfo)( + cl_context context, cl_context_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +typedef cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue_properties properties, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef +cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateCommandQueueWithProperties; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( + cl_command_queue command_queue, cl_command_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +typedef cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( + cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + const cl_image_desc *image_desc, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateImage; + +#endif + +#ifdef CL_VERSION_3_0 + +typedef cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( + cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, + size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( + cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, + const cl_image_format *image_format, const cl_image_desc *image_desc, + void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( + cl_context context, + void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), + void* user_data) CL_API_SUFFIX__VERSION_3_0; + +#else + +typedef void *cl_api_clCreateBufferWithProperties; +typedef void *cl_api_clCreateImageWithProperties; +typedef void *cl_api_clSetContextDestructorCallback; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clRetainMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( + cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, + cl_uint num_entries, cl_image_format *image_formats, + cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( + cl_mem memobj, cl_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetImageInfo)( + cl_mem image, cl_image_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef cl_mem(CL_API_CALL *cl_api_clCreatePipe)( + cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, + size_t /* param_value_size */, void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef void *(CL_API_CALL *cl_api_clSVMAlloc)( + cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, + unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef void(CL_API_CALL *cl_api_clSVMFree)( + cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreatePipe; +typedef void *cl_api_clGetPipeInfo; +typedef void *cl_api_clSVMAlloc; +typedef void *cl_api_clSVMFree; + +#endif + +// Sampler APIs +typedef cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( + cl_context context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clRetainSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( + cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef +cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( + cl_context /* context */, + const cl_sampler_properties * /* sampler_properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateSamplerWithProperties; + +#endif + +// Program Object APIs +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( + cl_context context, cl_uint count, const char **strings, + const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const size_t *lengths, const unsigned char **binaries, + cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef +cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateProgramWithBuiltInKernels; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clRetainProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clBuildProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clCompileProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_headers, + const cl_program *input_headers, const char **header_include_names, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL *cl_api_clLinkProgram)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_programs, + const cl_program *input_programs, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCompileProgram; +typedef void *cl_api_clLinkProgram; + +#endif + +#ifdef CL_VERSION_2_2 + +typedef +cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value) CL_API_SUFFIX__VERSION_2_2; + +typedef cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( + cl_program program, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_2_2; + +#else + +typedef void *cl_api_clSetProgramSpecializationConstant; +typedef void *cl_api_clSetProgramReleaseCallback; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( + cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clUnloadPlatformCompiler; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( + cl_program program, cl_program_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( + cl_program program, cl_device_id device, cl_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +typedef cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( + cl_program program, const char *kernel_name, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( + cl_program program, cl_uint num_kernels, cl_kernel *kernels, + cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clRetainKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArg)( + cl_kernel kernel, cl_uint arg_index, size_t arg_size, + const void *arg_value) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( + cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( + cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clGetKernelArgInfo; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( + cl_kernel /* in_kernel */, cl_device_id /*in_device*/, + cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, + const void * /*input_value*/, size_t /*param_value_size*/, + void * /*param_value*/, + size_t * /*param_value_size_ret*/) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clSetKernelArgSVMPointer; +typedef void *cl_api_clSetKernelExecInfo; +typedef void *cl_api_clGetKernelSubGroupInfoKHR; + +#endif + +// Event Object APIs +typedef cl_int(CL_API_CALL *cl_api_clWaitForEvents)( + cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetEventInfo)( + cl_event event, cl_event_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +typedef cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( + cl_event event, cl_profiling_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +typedef cl_int(CL_API_CALL *cl_api_clFlush)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clFinish)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueReadBufferRect; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueWriteBufferRect; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( + cl_command_queue command_queue, cl_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillBuffer; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueCopyBufferRect; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, + const size_t *origin, const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, + const size_t *origin, const size_t *region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( + cl_command_queue command_queue, cl_mem image, const void *fill_color, + const size_t origin[3], const size_t region[3], + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillImage; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, + const size_t *src_origin, const size_t *region, size_t dst_offset, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, + size_t src_offset, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef void *(CL_API_CALL *cl_api_clEnqueueMapImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t *origin, const size_t *region, + size_t *image_row_pitch, size_t *image_slice_pitch, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( + cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMigrateMemObjects; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( + cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueTask)( + cl_command_queue command_queue, cl_kernel kernel, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( + cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), + void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, + const void **args_mem_loc, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef void *( + CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char *function_name)CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMarkerWithWaitList; +typedef void *cl_api_clEnqueueBarrierWithWaitList; +typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; + +#endif + +// Shared Virtual Memory APIs + +#ifdef CL_VERSION_2_0 + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void ** /* svm_pointers */, + void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void ** /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, + void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clEnqueueSVMFree; +typedef void *cl_api_clEnqueueSVMMemcpy; +typedef void *cl_api_clEnqueueSVMMemFill; +typedef void *cl_api_clEnqueueSVMMap; +typedef void *cl_api_clEnqueueSVMUnmap; + +#endif + +// Deprecated APIs +typedef cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( + cl_command_queue command_queue, cl_command_queue_properties properties, + cl_bool enable, cl_command_queue_properties *old_properties) + CL_API_SUFFIX__VERSION_1_0_DEPRECATED; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_row_pitch, + void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_depth, + size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( + cl_command_queue command_queue, + cl_event *event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( + cl_command_queue command_queue, cl_uint num_events, + const cl_event *event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + +// GL and other APIs +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint bufobj, + int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( + cl_mem memobj, cl_gl_object_type *gl_object_type, + cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( + cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_gl_sharing */ +typedef cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( + const cl_context_properties *properties, cl_gl_context_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +/* cl_khr_gl_event */ +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( + cl_context context, cl_GLsync sync, cl_int *errcode_ret); + +#if defined(_WIN32) + +/* cl_khr_d3d10_sharing */ + +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, + ID3D10Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_d3d11_sharing */ +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_dx9_media_sharing */ +typedef +cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_d3d11_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, + ID3D11Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_dx9_media_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +#else + +/* cl_khr_d3d10_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; +typedef void *cl_api_clCreateFromD3D10BufferKHR; +typedef void *cl_api_clCreateFromD3D10Texture2DKHR; +typedef void *cl_api_clCreateFromD3D10Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; + +/* cl_khr_d3d11_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; +typedef void *cl_api_clCreateFromD3D11BufferKHR; +typedef void *cl_api_clCreateFromD3D11Texture2DKHR; +typedef void *cl_api_clCreateFromD3D11Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; + +/* cl_khr_dx9_media_sharing */ +typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; +typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; +typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; +typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; + +#endif + +/* OpenCL 1.1 */ + +#ifdef CL_VERSION_1_1 + +typedef cl_int(CL_API_CALL *cl_api_clSetEventCallback)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( + cl_mem /* buffer */, cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef +cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( + cl_mem /* memobj */, + void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, + void * /*user_data*/), + void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( + cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( + cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clSetEventCallback; +typedef void *cl_api_clCreateSubBuffer; +typedef void *cl_api_clSetMemObjectDestructorCallback; +typedef void *cl_api_clCreateUserEvent; +typedef void *cl_api_clSetUserEventStatus; + +#endif + +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( + cl_device_id in_device, + const cl_device_partition_property_ext *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_egl_image */ +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( + cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, + cl_mem_flags flags, const cl_egl_image_properties_khr *properties, + cl_int *errcode_ret); + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_egl_event */ +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( + cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, + cl_int *errcode_ret); + +#ifdef CL_VERSION_2_1 + +typedef cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( + cl_context context, const void *il, size_t length, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( + cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( + cl_command_queue command_queue, cl_uint num_svm_pointers, + const void **svm_pointers, const size_t *sizes, + cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( + cl_device_id device, cl_ulong *device_timestamp, + cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int(CL_API_CALL *cl_api_clGetHostTimer)( + cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#else + +typedef void *cl_api_clSetDefaultDeviceCommandQueue; +typedef void *cl_api_clCreateProgramWithIL; +typedef void *cl_api_clGetKernelSubGroupInfo; +typedef void *cl_api_clCloneKernel; +typedef void *cl_api_clEnqueueSVMMigrateMem; +typedef void *cl_api_clGetDeviceAndHostTimer; +typedef void *cl_api_clGetHostTimer; + +#endif + +/* Vendor dispatch table structure */ + +typedef struct _cl_icd_dispatch { + /* OpenCL 1.0 */ + cl_api_clGetPlatformIDs clGetPlatformIDs; + cl_api_clGetPlatformInfo clGetPlatformInfo; + cl_api_clGetDeviceIDs clGetDeviceIDs; + cl_api_clGetDeviceInfo clGetDeviceInfo; + cl_api_clCreateContext clCreateContext; + cl_api_clCreateContextFromType clCreateContextFromType; + cl_api_clRetainContext clRetainContext; + cl_api_clReleaseContext clReleaseContext; + cl_api_clGetContextInfo clGetContextInfo; + cl_api_clCreateCommandQueue clCreateCommandQueue; + cl_api_clRetainCommandQueue clRetainCommandQueue; + cl_api_clReleaseCommandQueue clReleaseCommandQueue; + cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; + cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; + cl_api_clCreateBuffer clCreateBuffer; + cl_api_clCreateImage2D clCreateImage2D; + cl_api_clCreateImage3D clCreateImage3D; + cl_api_clRetainMemObject clRetainMemObject; + cl_api_clReleaseMemObject clReleaseMemObject; + cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; + cl_api_clGetMemObjectInfo clGetMemObjectInfo; + cl_api_clGetImageInfo clGetImageInfo; + cl_api_clCreateSampler clCreateSampler; + cl_api_clRetainSampler clRetainSampler; + cl_api_clReleaseSampler clReleaseSampler; + cl_api_clGetSamplerInfo clGetSamplerInfo; + cl_api_clCreateProgramWithSource clCreateProgramWithSource; + cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; + cl_api_clRetainProgram clRetainProgram; + cl_api_clReleaseProgram clReleaseProgram; + cl_api_clBuildProgram clBuildProgram; + cl_api_clUnloadCompiler clUnloadCompiler; + cl_api_clGetProgramInfo clGetProgramInfo; + cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; + cl_api_clCreateKernel clCreateKernel; + cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; + cl_api_clRetainKernel clRetainKernel; + cl_api_clReleaseKernel clReleaseKernel; + cl_api_clSetKernelArg clSetKernelArg; + cl_api_clGetKernelInfo clGetKernelInfo; + cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; + cl_api_clWaitForEvents clWaitForEvents; + cl_api_clGetEventInfo clGetEventInfo; + cl_api_clRetainEvent clRetainEvent; + cl_api_clReleaseEvent clReleaseEvent; + cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; + cl_api_clFlush clFlush; + cl_api_clFinish clFinish; + cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; + cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; + cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; + cl_api_clEnqueueReadImage clEnqueueReadImage; + cl_api_clEnqueueWriteImage clEnqueueWriteImage; + cl_api_clEnqueueCopyImage clEnqueueCopyImage; + cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; + cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; + cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; + cl_api_clEnqueueMapImage clEnqueueMapImage; + cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; + cl_api_clEnqueueTask clEnqueueTask; + cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; + cl_api_clEnqueueMarker clEnqueueMarker; + cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; + cl_api_clEnqueueBarrier clEnqueueBarrier; + cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; + cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; + cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; + cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; + cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; + cl_api_clGetGLObjectInfo clGetGLObjectInfo; + cl_api_clGetGLTextureInfo clGetGLTextureInfo; + cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; + cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; + cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; + + /* cl_khr_d3d10_sharing */ + cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; + cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; + cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; + cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; + cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; + cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; + + /* OpenCL 1.1 */ + cl_api_clSetEventCallback clSetEventCallback; + cl_api_clCreateSubBuffer clCreateSubBuffer; + cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; + cl_api_clCreateUserEvent clCreateUserEvent; + cl_api_clSetUserEventStatus clSetUserEventStatus; + cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; + cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; + cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; + + /* cl_ext_device_fission */ + cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; + cl_api_clRetainDeviceEXT clRetainDeviceEXT; + cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; + + /* cl_khr_gl_event */ + cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + cl_api_clCreateSubDevices clCreateSubDevices; + cl_api_clRetainDevice clRetainDevice; + cl_api_clReleaseDevice clReleaseDevice; + cl_api_clCreateImage clCreateImage; + cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; + cl_api_clCompileProgram clCompileProgram; + cl_api_clLinkProgram clLinkProgram; + cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; + cl_api_clGetKernelArgInfo clGetKernelArgInfo; + cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; + cl_api_clEnqueueFillImage clEnqueueFillImage; + cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; + cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; + cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; + cl_api_clGetExtensionFunctionAddressForPlatform + clGetExtensionFunctionAddressForPlatform; + cl_api_clCreateFromGLTexture clCreateFromGLTexture; + + /* cl_khr_d3d11_sharing */ + cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; + cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; + cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; + cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; + cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; + cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; + cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; + + /* cl_khr_dx9_media_sharing */ + cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR + clGetDeviceIDsFromDX9MediaAdapterKHR; + cl_api_clEnqueueAcquireDX9MediaSurfacesKHR + clEnqueueAcquireDX9MediaSurfacesKHR; + cl_api_clEnqueueReleaseDX9MediaSurfacesKHR + clEnqueueReleaseDX9MediaSurfacesKHR; + + /* cl_khr_egl_image */ + cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; + cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; + cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; + + /* cl_khr_egl_event */ + cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; + + /* OpenCL 2.0 */ + cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; + cl_api_clCreatePipe clCreatePipe; + cl_api_clGetPipeInfo clGetPipeInfo; + cl_api_clSVMAlloc clSVMAlloc; + cl_api_clSVMFree clSVMFree; + cl_api_clEnqueueSVMFree clEnqueueSVMFree; + cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; + cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; + cl_api_clEnqueueSVMMap clEnqueueSVMMap; + cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; + cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; + cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; + cl_api_clSetKernelExecInfo clSetKernelExecInfo; + + /* cl_khr_sub_groups */ + cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + cl_api_clCloneKernel clCloneKernel; + cl_api_clCreateProgramWithIL clCreateProgramWithIL; + cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; + cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; + cl_api_clGetHostTimer clGetHostTimer; + cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; + cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; + cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; + + /* OpenCL 3.0 */ + cl_api_clCreateBufferWithProperties clCreateBufferWithProperties; + cl_api_clCreateImageWithProperties clCreateImageWithProperties; + cl_api_clSetContextDestructorCallback clSetContextDestructorCallback; + +} cl_icd_dispatch; + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/CL/cl_layer.h b/CL/cl_layer.h new file mode 100644 index 000000000..fe628bf8a --- /dev/null +++ b/CL/cl_layer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef OPENCL_CL_LAYER_H +#define OPENCL_CL_LAYER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_layer_info; +typedef cl_uint cl_layer_api_version; +#define CL_LAYER_API_VERSION 0x4240 +#define CL_LAYER_NAME 0x4241 +#define CL_LAYER_API_VERSION_100 100 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetLayerInfo(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef cl_int +(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clInitLayer(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +typedef cl_int +(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* OPENCL_CL_LAYER_H */ diff --git a/CL/cl_platform.h b/CL/cl_platform.h new file mode 100644 index 000000000..e7a0d6f47 --- /dev/null +++ b/CL/cl_platform.h @@ -0,0 +1,1412 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL __stdcall + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK __stdcall + #endif +#else + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK + #endif +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#ifndef CL_API_SUFFIX_USER +#define CL_API_SUFFIX_USER +#endif + +#ifndef CL_API_PREFIX_USER +#define CL_API_PREFIX_USER +#endif + +#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER +#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER + +#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON + + +#ifdef __GNUC__ + #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_API_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON +#else + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlanguage-extension-token" +#endif + +/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */ +/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */ +#if defined(__clang__) || _MSC_VER >= 1600 + #include +#endif + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #if !defined(__clang__) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + #endif + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined(_WIN32) && defined(_MSC_VER) && !defined(__STDC__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined(__GNUC__) && ! defined(__STRICT_ANSI__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined(__clang__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) || defined(__INTEGRITY) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + #pragma warning( pop ) +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/CL/cl_va_api_media_sharing_intel.h b/CL/cl_va_api_media_sharing_intel.h new file mode 100644 index 000000000..547e90e88 --- /dev/null +++ b/CL/cl_va_api_media_sharing_intel.h @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H +#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*************************************************************** +* cl_intel_sharing_format_query_va_api +***************************************************************/ +#define cl_intel_sharing_format_query_va_api 1 + +/* when cl_intel_va_api_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedVA_APIMediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + +/****************************************** +* cl_intel_va_api_media_sharing extension * +*******************************************/ + +#define cl_intel_va_api_media_sharing 1 + +/* error codes */ +#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098 +#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099 +#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100 +#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101 + +/* cl_va_api_device_source_intel */ +#define CL_VA_API_DISPLAY_INTEL 0x4094 + +/* cl_va_api_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095 +#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096 + +/* cl_context_info */ +#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 + +/* cl_mem_info */ +#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098 + +/* cl_image_info */ +#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A +#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B + +typedef cl_uint cl_va_api_device_source_intel; +typedef cl_uint cl_va_api_device_set_intel; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void* media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void* media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromVA_APIMediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + VASurfaceID* surface, + cl_uint plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + VASurfaceID* surface, + cl_uint plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */ + diff --git a/CL/cl_version.h b/CL/cl_version.h new file mode 100644 index 000000000..3844938d5 --- /dev/null +++ b/CL/cl_version.h @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2018-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)") +#define CL_TARGET_OPENCL_VERSION 300 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 && \ + CL_TARGET_OPENCL_VERSION != 300 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 300 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) +#define CL_VERSION_3_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/CL/license.txt b/CL/license.txt new file mode 100644 index 000000000..f49a4e16e --- /dev/null +++ b/CL/license.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/CL/opencl.h b/CL/opencl.h new file mode 100644 index 000000000..ef8dd1e03 --- /dev/null +++ b/CL/opencl.h @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright (c) 2008-2021 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ diff --git a/CL/opencl.hpp b/CL/opencl.hpp new file mode 100644 index 000000000..1e61d7890 --- /dev/null +++ b/CL/opencl.hpp @@ -0,0 +1,10372 @@ +// +// Copyright (c) 2008-2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0, OpenCL 1.1, OpenCL 1.2, + * OpenCL 2.0, OpenCL 2.1, OpenCL 2.2, and OpenCL 3.0. + * \author Lee Howes and Bruce Merry + * + * Derived from the OpenCL 1.x C++ bindings written by + * Benedict R. Gaster, Laurent Morichetti and Lee Howes + * With additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * James Price, 2015- + * \version 2.2.0 + * \date 2019-09-18 + * + * Optional extension support + * + * cl_ext_device_fission + * #define CL_HPP_USE_CL_DEVICE_FISSION + * cl_khr_d3d10_sharing + * #define CL_HPP_USE_DX_INTEROP + * cl_khr_sub_groups + * #define CL_HPP_USE_CL_SUB_GROUPS_KHR + * cl_khr_image2d_from_buffer + * #define CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR + * + * Doxygen documentation for this header is available here: + * + * http://khronosgroup.github.io/OpenCL-CLHPP/ + * + * The latest version of this header can be found on the GitHub releases page: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP/releases + * + * Bugs and patches can be submitted to the GitHub repository: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * The interface is contained with a single C++ header file \em opencl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings; it is enough to simply include \em opencl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * There are numerous compatibility, portability and memory management + * fixes in the new header as well as additional OpenCL 2.0 features. + * As a result the header is not directly backward compatible and for this + * reason we release it as opencl.hpp rather than a new version of cl.hpp. + * + * + * \section compatibility Compatibility + * Due to the evolution of the underlying OpenCL API the 2.0 C++ bindings + * include an updated approach to defining supported feature versions + * and the range of valid underlying OpenCL runtime versions supported. + * + * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and + * CL_HPP_MINIMUM_OPENCL_VERSION control this range. These are three digit + * decimal values representing OpenCL runime versions. The default for + * the target is 200, representing OpenCL 2.0 and the minimum is also + * defined as 200. These settings would use 2.0 API calls only. + * If backward compatibility with a 1.2 runtime is required, the minimum + * version may be set to 120. + * + * Note that this is a compile-time setting, and so affects linking against + * a particular SDK version rather than the versioning of the loaded runtime. + * + * The earlier versions of the header included basic vector and string + * classes based loosely on STL versions. These were difficult to + * maintain and very rarely used. For the 2.0 header we now assume + * the presence of the standard library unless requested otherwise. + * We use std::array, std::vector, std::shared_ptr and std::string + * throughout to safely manage memory and reduce the chance of a + * recurrance of earlier memory management bugs. + * + * These classes are used through typedefs in the cl namespace: + * cl::array, cl::vector, cl::pointer and cl::string. + * In addition cl::allocate_pointer forwards to std::allocate_shared + * by default. + * In all cases these standard library classes can be replaced with + * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, + * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and + * CL_HPP_NO_STD_STRING macros. + * + * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper + * class to interface with kernel enqueue. This caused unpleasant interactions + * with the standard size_t declaration and led to namespacing bugs. + * In the 2.0 version we have replaced this with a std::array-based interface. + * However, the old behaviour can be regained for backward compatibility + * using the CL_HPP_ENABLE_SIZE_T_COMPATIBILITY macro. + * + * Finally, the program construction interface used a clumsy vector-of-pairs + * design in the earlier versions. We have replaced that with a cleaner + * vector-of-vectors and vector-of-strings design. However, for backward + * compatibility old behaviour can be regained with the + * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY macro. + * + * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with + * earlier versions. As a result a flag must be passed to the OpenCL C + * compiled to request OpenCL 2.0 compilation of kernels with 1.2 as + * the default in the absence of the flag. + * In some cases the C++ bindings automatically compile code for ease. + * For those cases the compilation defaults to OpenCL C 2.0. + * If this is not wanted, the CL_HPP_CL_1_2_DEFAULT_BUILD macro may + * be specified to assume 1.2 compilation. + * If more fine-grained decisions on a per-kernel bases are required + * then explicit build operations that take the flag should be used. + * + * + * \section parameterization Parameters + * This header may be parameterized by a set of preprocessor macros. + * + * - CL_HPP_TARGET_OPENCL_VERSION + * + * Defines the target OpenCL runtime version to build the header + * against. Defaults to 200, representing OpenCL 2.0. + * + * - CL_HPP_NO_STD_STRING + * + * Do not use the standard library string class. cl::string is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_VECTOR + * + * Do not use the standard library vector class. cl::vector is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_ARRAY + * + * Do not use the standard library array class. cl::array is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_UNIQUE_PTR + * + * Do not use the standard library unique_ptr class. cl::pointer and + * the cl::allocate_pointer functions are not defined and may be + * defined by the user before opencl.hpp is included. + * + * - CL_HPP_ENABLE_EXCEPTIONS + * + * Enable exceptions for use in the C++ bindings header. This is the + * preferred error handling mechanism but is not required. + * + * - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY + * + * Backward compatibility option to support cl.hpp-style size_t + * class. Replaces the updated std::array derived version and + * removal of size_t from the namespace. Note that in this case the + * new size_t class is placed in the cl::compatibility namespace and + * thus requires an additional using declaration for direct backward + * compatibility. + * + * - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY + * + * Enable older vector of pairs interface for construction of + * programs. + * + * - CL_HPP_CL_1_2_DEFAULT_BUILD + * + * Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 + * applies to use of cl::Program construction and other program + * build variants. + * + * - CL_HPP_USE_CL_DEVICE_FISSION + * + * Enable the cl_ext_device_fission extension. + * + * - CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR + * + * Enable the cl_khr_image2d_from_buffer extension. + * + * - CL_HPP_USE_CL_SUB_GROUPS_KHR + * + * Enable the cl_khr_subgroups extension. + * + * - CL_HPP_USE_DX_INTEROP + * + * Enable the cl_khr_d3d10_sharing extension. + * + * - CL_HPP_USE_IL_KHR + * + * Enable the cl_khr_il_program extension. + * + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * Note: the C++ bindings use std::call_once and therefore may need to be + * compiled using special command-line options (such as "-pthread") on some + * platforms! + * + * \code + #define CL_HPP_ENABLE_EXCEPTIONS + #define CL_HPP_TARGET_OPENCL_VERSION 200 + + #include + #include + #include + #include + #include + + const int numElements = 32; + + int main(void) + { + // Filter for a 2.0 or newer platform and set it as the default + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform plat; + for (auto &p : platforms) { + std::string platver = p.getInfo(); + if (platver.find("OpenCL 2.") != std::string::npos || + platver.find("OpenCL 3.") != std::string::npos) { + // Note: an OpenCL 3.x platform may not support all required features! + plat = p; + } + } + if (plat() == 0) { + std::cout << "No OpenCL 2.0 or newer platform found.\n"; + return -1; + } + + cl::Platform newP = cl::Platform::setDefault(plat); + if (newP != plat) { + std::cout << "Error setting default platform.\n"; + return -1; + } + + // C++11 raw string literal for the first kernel + std::string kernel1{R"CLC( + global int globalA; + kernel void updateGlobal() + { + globalA = 75; + } + )CLC"}; + + // Raw string literal for the second kernel + std::string kernel2{R"CLC( + typedef struct { global int *bar; } Foo; + kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, + global int *output, int val, write_only pipe int outPipe, queue_t childQueue) + { + output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); + write_pipe(outPipe, &val); + queue_t default_queue = get_default_queue(); + ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); + + // Have a child kernel write into third quarter of output + enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*2 + get_global_id(0)] = + inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; + }); + + // Have a child kernel write into last quarter of output + enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*3 + get_global_id(0)] = + inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; + }); + } + )CLC"}; + + std::vector programStrings; + programStrings.push_back(kernel1); + programStrings.push_back(kernel2); + + cl::Program vectorAddProgram(programStrings); + try { + vectorAddProgram.build("-cl-std=CL2.0"); + } + catch (...) { + // Print build info for all devices + cl_int buildErr = CL_SUCCESS; + auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); + for (auto &pair : buildInfo) { + std::cerr << pair.second << std::endl << std::endl; + } + + return 1; + } + + typedef struct { int *bar; } Foo; + + // Get and run kernel that initializes the program-scope global + // A test for kernels that take no arguments + auto program2Kernel = + cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); + program2Kernel( + cl::EnqueueArgs( + cl::NDRange(1))); + + ////////////////// + // SVM allocations + + auto anSVMInt = cl::allocate_svm>(); + *anSVMInt = 5; + cl::SVMAllocator>> svmAllocReadOnly; + auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); + fooPointer->bar = anSVMInt.get(); + cl::SVMAllocator> svmAlloc; + std::vector>> inputA(numElements, 1, svmAlloc); + cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); + + ////////////// + // Traditional cl_mem allocations + + std::vector output(numElements, 0xdeadbeef); + cl::Buffer outputBuffer(begin(output), end(output), false); + cl::Pipe aPipe(sizeof(cl_int), numElements / 2); + + // Default command queue, also passed in as a parameter + cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( + cl::Context::getDefault(), cl::Device::getDefault()); + + auto vectorAddKernel = + cl::KernelFunctor< + decltype(fooPointer)&, + int*, + cl::coarse_svm_vector&, + cl::Buffer, + int, + cl::Pipe&, + cl::DeviceCommandQueue + >(vectorAddProgram, "vectorAdd"); + + // Ensure that the additional SVM pointer is available to the kernel + // This one was not passed as a parameter + vectorAddKernel.setSVMPointers(anSVMInt); + + cl_int error; + vectorAddKernel( + cl::EnqueueArgs( + cl::NDRange(numElements/2), + cl::NDRange(numElements/2)), + fooPointer, + inputA.data(), + inputB, + outputBuffer, + 3, + aPipe, + defaultDeviceQueue, + error + ); + + cl::copy(outputBuffer, begin(output), end(output)); + + cl::Device d = cl::Device::getDefault(); + + std::cout << "Output:\n"; + for (int i = 1; i < numElements; ++i) { + std::cout << "\t" << output[i] << "\n"; + } + std::cout << "\n\n"; + + return 0; + } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +/* Handle deprecated preprocessor definitions. In each case, we only check for + * the old name if the new name is not defined, so that user code can define + * both and hence work with either version of the bindings. + */ +#if !defined(CL_HPP_USE_DX_INTEROP) && defined(USE_DX_INTEROP) +# pragma message("opencl.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") +# define CL_HPP_USE_DX_INTEROP +#endif +#if !defined(CL_HPP_USE_CL_DEVICE_FISSION) && defined(USE_CL_DEVICE_FISSION) +# pragma message("opencl.hpp: USE_CL_DEVICE_FISSION is deprecated. Define CL_HPP_USE_CL_DEVICE_FISSION instead") +# define CL_HPP_USE_CL_DEVICE_FISSION +#endif +#if !defined(CL_HPP_ENABLE_EXCEPTIONS) && defined(__CL_ENABLE_EXCEPTIONS) +# pragma message("opencl.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") +# define CL_HPP_ENABLE_EXCEPTIONS +#endif +#if !defined(CL_HPP_NO_STD_VECTOR) && defined(__NO_STD_VECTOR) +# pragma message("opencl.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") +# define CL_HPP_NO_STD_VECTOR +#endif +#if !defined(CL_HPP_NO_STD_STRING) && defined(__NO_STD_STRING) +# pragma message("opencl.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") +# define CL_HPP_NO_STD_STRING +#endif +#if defined(VECTOR_CLASS) +# pragma message("opencl.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") +#endif +#if defined(STRING_CLASS) +# pragma message("opencl.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") +#endif +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) && defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +# pragma message("opencl.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") +# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS +#endif + +/* Warn about features that are no longer supported + */ +#if defined(__USE_DEV_VECTOR) +# pragma message("opencl.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") +#endif +#if defined(__USE_DEV_STRING) +# pragma message("opencl.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") +#endif + +/* Detect which version to target */ +#if !defined(CL_HPP_TARGET_OPENCL_VERSION) +# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 300 (OpenCL 3.0)") +# define CL_HPP_TARGET_OPENCL_VERSION 300 +#endif +#if CL_HPP_TARGET_OPENCL_VERSION != 100 && \ + CL_HPP_TARGET_OPENCL_VERSION != 110 && \ + CL_HPP_TARGET_OPENCL_VERSION != 120 && \ + CL_HPP_TARGET_OPENCL_VERSION != 200 && \ + CL_HPP_TARGET_OPENCL_VERSION != 210 && \ + CL_HPP_TARGET_OPENCL_VERSION != 220 && \ + CL_HPP_TARGET_OPENCL_VERSION != 300 +# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 300 (OpenCL 3.0).") +# undef CL_HPP_TARGET_OPENCL_VERSION +# define CL_HPP_TARGET_OPENCL_VERSION 300 +#endif + +/* Forward target OpenCL version to C headers if necessary */ +#if defined(CL_TARGET_OPENCL_VERSION) +/* Warn if prior definition of CL_TARGET_OPENCL_VERSION is lower than + * requested C++ bindings version */ +#if CL_TARGET_OPENCL_VERSION < CL_HPP_TARGET_OPENCL_VERSION +# pragma message("CL_TARGET_OPENCL_VERSION is already defined as is lower than CL_HPP_TARGET_OPENCL_VERSION") +#endif +#else +# define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION +#endif + +#if !defined(CL_HPP_MINIMUM_OPENCL_VERSION) +# define CL_HPP_MINIMUM_OPENCL_VERSION 200 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 110 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 120 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 200 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 210 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 220 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 300 +# pragma message("opencl.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 100") +# undef CL_HPP_MINIMUM_OPENCL_VERSION +# define CL_HPP_MINIMUM_OPENCL_VERSION 100 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION > CL_HPP_TARGET_OPENCL_VERSION +# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" +#endif + +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_2_APIS +#endif + +#ifdef _WIN32 + +#include + +#if defined(CL_HPP_USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +#if defined(_MSC_VER) +#include +#endif // _MSC_VER + + // Check for a valid C++ version + +// Need to do both tests here because for some reason __cplusplus is not +// updated in visual studio +#if (!defined(_MSC_VER) && __cplusplus < 201103L) || (defined(_MSC_VER) && _MSC_VER < 1700) +#error Visual studio 2013 or another C++11-supporting compiler required +#endif + +// +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) || defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#else +#include +#endif // !__APPLE__ + +#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L ) +#define CL_HPP_NOEXCEPT_ noexcept +#else +#define CL_HPP_NOEXCEPT_ +#endif + +#if __cplusplus >= 201703L +# define CL_HPP_DEFINE_STATIC_MEMBER_ inline +#elif defined(_MSC_VER) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) +#elif defined(__MINGW32__) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((selectany)) +#else +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) +#endif // !_MSC_VER + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include +#include +#include +#include +#include + + +// Define a size_type to represent a correctly resolved size_t +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = ::size_t; +} // namespace cl +#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + +#if !defined(CL_HPP_NO_STD_VECTOR) +#include +namespace cl { + template < class T, class Alloc = std::allocator > + using vector = std::vector; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_VECTOR) + +#if !defined(CL_HPP_NO_STD_STRING) +#include +namespace cl { + using string = std::string; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_STRING) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +#include +namespace cl { + // Replace unique_ptr and allocate_pointer for internal use + // to allow user to replace them + template + using pointer = std::unique_ptr; +} // namespace cl +#endif +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if !defined(CL_HPP_NO_STD_ARRAY) +#include +namespace cl { + template < class T, size_type N > + using array = std::array; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_ARRAY) + +// Define size_type appropriately to allow backward-compatibility +// use of the old size_t interface class +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + namespace compatibility { + /*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ + template + class size_t + { + private: + size_type data_[N]; + + public: + //! \brief Initialize size_t to all 0s + size_t() + { + for (int i = 0; i < N; ++i) { + data_[i] = 0; + } + } + + size_t(const array &rhs) + { + for (int i = 0; i < N; ++i) { + data_[i] = rhs[i]; + } + } + + size_type& operator[](int index) + { + return data_[index]; + } + + const size_type& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator size_type* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const size_type* () const { return data_; } + + operator array() const + { + array ret; + + for (int i = 0; i < N; ++i) { + ret[i] = data_[i]; + } + return ret; + } + }; + } // namespace compatibility + + template + using size_t = compatibility::size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + +// Helper alias to avoid confusing the macros +namespace cl { + namespace detail { + using size_t_array = array; + } // namespace detail +} // namespace cl + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + class Memory; + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if (!pfn_##name) { \ + } \ + } + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if (!pfn_##name) { \ + } \ + } + + class Program; + class Device; + class Context; + class CommandQueue; + class DeviceCommandQueue; + class Memory; + class Buffer; + class Pipe; + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + /*! \brief Exception class + * + * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. + */ + class Error : public std::exception + { + private: + cl_int err_; + const char * errStr_; + public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } + }; +#define CL_HPP_ERR_STR_(x) #x +#else +#define CL_HPP_ERR_STR_(x) NULL +#endif // CL_HPP_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // CL_HPP_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) +#define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfo) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) +#define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) +#define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) + +#define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) +#define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) +#define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) +#define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __ENQUEUE_MIGRATE_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMigrateMem) +#define __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clSetDefaultDeviceCommandQueue) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 + + +#define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) + +#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) +#define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) + + +#define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) +#define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) +#define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) +#define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) +#define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __GET_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetHostTimer) +#define __GET_DEVICE_AND_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetDeviceAndHostTimer) +#endif +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +#define __SET_PROGRAM_RELEASE_CALLBACK_ERR CL_HPP_ERR_STR_(clSetProgramReleaseCallback) +#define __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR CL_HPP_ERR_STR_(clSetProgramSpecializationConstant) +#endif + + +/** + * CL 1.2 version that uses device fission. + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/** + * Deprecated APIs for 2.0 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) +#define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) +#define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/** + * CL 1.2 marker and barrier commands + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __CLONE_KERNEL_ERR CL_HPP_ERR_STR_(clCloneKernel) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 + +#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized for getInfo +// Assumes that the output vector was correctly resized on the way in +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector>* param, int) +{ + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; + } + if (param) { + // Create array of pointers, calculate total size and pass pointer array in + size_type numBinaries = param->size(); + vector binariesPointers(numBinaries); + + for (size_type i = 0; i < numBinaries; ++i) + { + binariesPointers[i] = (*param)[i].data(); + } + + cl_int err = f(name, numBinaries * sizeof(unsigned char*), binariesPointers.data(), NULL); + + if (err != CL_SUCCESS) { + return err; + } + } + + + return CL_SUCCESS; +} + +// Specialized getInfoHelper for vector params +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + const size_type elements = required / sizeof(T); + + // Temporary to avoid changing param on an error + vector localData(elements); + err = f(name, required, localData.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + *param = std::move(localData); + } + + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper( + Func f, cl_uint name, vector* param, int, typename T::cl_type = 0) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + const size_type elements = required / sizeof(typename T::cl_type); + + vector value(elements); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + + if (param) { + // Assign to convert CL type to T for each element + param->resize(elements); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < elements; i++) { + (*param)[i] = T(value[i], true); + } + } + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for string params +template +inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + // std::string has a constant data member + // a char vector does not + if (required > 0) { + vector value(required); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + param->assign(begin(value), prev(end(value))); + } + } + else if (param) { + param->assign(""); + } + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for clsize_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, array* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + size_type elements = required / sizeof(size_type); + vector value(elements, 0); + + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + + // Bound the copy with N to prevent overruns + // if passed N > than the amount copied + if (elements > N) { + elements = N; + } + for (size_type i = 0; i < elements; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ + F(cl_platform_info, CL_PLATFORM_VERSION, string) \ + F(cl_platform_info, CL_PLATFORM_NAME, string) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, string) \ + F(cl_device_info, CL_DEVICE_VENDOR, string) \ + F(cl_device_info, CL_DRIVER_VERSION, string) \ + F(cl_device_info, CL_DEVICE_PROFILE, string) \ + F(cl_device_info, CL_DEVICE_VERSION, string) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, size_type) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ + F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ + F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_SOURCE, string) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + + +#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, size_type) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) + +#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_GLOBAL_WORK_SIZE, cl::detail::size_t_array) \ + \ + F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ + F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, size_type) \ + \ + F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ + F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ + F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint ) \ + F(cl_device_info, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_type ) \ + F(cl_device_info, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_type ) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void**) \ + F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ + F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ + F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ + F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(F) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, size_type) + +#define CL_HPP_PARAM_NAME_INFO_IL_KHR_(F) \ + F(cl_device_info, CL_DEVICE_IL_VERSION_KHR, string) \ + F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ + F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, cl_ulong) \ + F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ + F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ + F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ + F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) + +#define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, cl_bool) + +#define CL_HPP_PARAM_NAME_DEVICE_FISSION_(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(F) \ + F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + \ + F(cl_device_info, CL_DEVICE_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(F) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR, cl_version_khr) + +#define CL_HPP_PARAM_NAME_INFO_3_0_(F) \ + F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION, cl_version) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION, cl::vector) \ + \ + F(cl_device_info, CL_DEVICE_NUMERIC_VERSION, cl_version) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, cl_device_atomic_capabilities) \ + F(cl_device_info, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, cl_device_atomic_capabilities) \ + F(cl_device_info, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_ALL_VERSIONS, cl::vector) \ + F(cl_device_info, CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ + F(cl_device_info, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_FEATURES, cl::vector) \ + F(cl_device_info, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, cl_device_device_enqueue_capabilities) \ + F(cl_device_info, CL_DEVICE_PIPE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, string) \ + \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES_ARRAY, cl::vector) \ + F(cl_mem_info, CL_MEM_PROPERTIES, cl::vector) \ + F(cl_pipe_info, CL_PIPE_PROPERTIES, cl::vector) \ + F(cl_sampler_info, CL_SAMPLER_PROPERTIES, cl::vector) + +template +struct param_traits {}; + +#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +CL_HPP_PARAM_NAME_INFO_2_1_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +CL_HPP_PARAM_NAME_INFO_2_2_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 +#if CL_HPP_TARGET_OPENCL_VERSION >= 300 +CL_HPP_PARAM_NAME_INFO_3_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 300 + +#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 +CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 + +#if defined(CL_HPP_USE_IL_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 +CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // #if defined(CL_HPP_USE_IL_KHR) + + +// Flags deprecated in OpenCL 2.0 +#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) + +#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) + +#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) + +// Include deprecated query flags based on versions +// Only include deprecated 1.0 flags if 2.0 not active as there is an enum clash +#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && CL_HPP_TARGET_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 110 +#if CL_HPP_TARGET_OPENCL_VERSION > 110 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#if CL_HPP_TARGET_OPENCL_VERSION > 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +CL_HPP_PARAM_NAME_DEVICE_FISSION_(CL_HPP_DECLARE_PARAM_TRAITS_); +#endif // CL_HPP_USE_CL_DEVICE_FISSION + +#if defined(cl_khr_extended_versioning) +#if CL_HPP_TARGET_OPENCL_VERSION < 300 +CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION < 300 +CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // cl_khr_extended_versioning + +#if defined(cl_khr_device_uuid) +using uuid_array = array; +using luid_array = array; +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DRIVER_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_VALID_KHR, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_KHR, luid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NODE_MASK_KHR, cl_uint) +#endif + +#if defined(cl_khr_pci_bus_info) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PCI_BUS_INFO_KHR, cl_device_pci_bus_info_khr) +#endif + +#if defined(cl_khr_integer_dot_product) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, cl_device_integer_dot_product_capabilities_khr) +#if defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +#endif // defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +#endif // defined(cl_khr_integer_dot_product) + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_BOARD_NAME_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_BOARD_NAME_AMD, string) +#endif + +#ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) +#endif +#ifdef CL_DEVICE_JOB_SLOTS_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) +#endif +#ifdef CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, cl_bitfield) +#endif +#ifdef CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, vector) +#endif +#ifdef CL_DEVICE_MAX_WARP_COUNT_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_MAX_WARP_COUNT_ARM, cl_uint) +#endif +#ifdef CL_KERNEL_MAX_WARP_COUNT_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_info, CL_KERNEL_MAX_WARP_COUNT_ARM, cl_uint) +#endif +#ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) +#endif +#ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM, cl_int) +#endif +#ifdef CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM, cl_uint) +#endif +#ifdef CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, size_type size, void* value, size_type* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, size_type size, void* value, size_type* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + + +template +struct ReferenceHandler +{ }; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // ! (CL_HPP_TARGET_OPENCL_VERSION >= 120) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const vector &versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + size_type size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + + vector versionInfo(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + size_type size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + vector devices(size/sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj, bool retainObject) : object_(obj) + { + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ + { + object_ = rhs.object_; + rhs.object_ = NULL; + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + rhs.object_ = NULL; + } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + return *this; + } + + const cl_type& operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + + cl_type get() const { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + if (object_ != nullptr) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if (object_ != nullptr) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } +#else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + retVal = true; +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + (void)device; + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj, bool retainObject) : + object_(obj), + referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } + } + + ~Wrapper() + { + release(); + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ + { + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + const cl_type& operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + + cl_type get() const { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, vector*, int, typename U::cl_type); + + cl_int retain() const + { + if( object_ != nullptr && referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if (object_ != nullptr && referenceCountable_) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +template +inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) +{ + return lhs() == rhs(); +} + +template +inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) +{ + return !operator==(lhs, rhs); +} + +} // namespace detail +//! \endcond + + + + + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Copy constructor. + ImageFormat(const ImageFormat &other) { *this = other; } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Device default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault(); + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Device &p) { + default_ = p; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Device(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + explicit Device(const cl_device_id &device, bool retainObject = false) : + detail::Wrapper(device, retainObject) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default device to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default device. + * Should be compared to the passed value to ensure that it was updated. + */ + static Device setDefault(const Device &default_device) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); + detail::errHandler(default_error_); + return default_; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device(const Device& dev) : detail::Wrapper(dev) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (const Device &dev) + { + detail::Wrapper::operator=(dev); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Device(Device&& dev) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(dev)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (Device &&dev) + { + detail::Wrapper::operator=(std::move(dev)); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Return the current value of the host clock as seen by the device. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return The host timer value. + */ + cl_ulong getHostTimer(cl_int *error = nullptr) + { + cl_ulong retVal = 0; + cl_int err = + clGetHostTimer(this->get(), &retVal); + detail::errHandler( + err, + __GET_HOST_TIMER_ERR); + if (error) { + *error = err; + } + return retVal; + } + + /** + * Return a synchronized pair of host and device timestamps as seen by device. + * Use to correlate the clocks and get the host timer only using getHostTimer + * as a lower cost mechanism in between calls. + * The resolution of the host timer may be queried with the + * CL_PLATFORM_HOST_TIMER_RESOLUTION query. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return A pair of (device timer, host timer) timer values. + */ + std::pair getDeviceAndHostTimer(cl_int *error = nullptr) + { + std::pair retVal; + cl_int err = + clGetDeviceAndHostTimer(this->get(), &(retVal.first), &(retVal.second)); + detail::errHandler( + err, + __GET_DEVICE_AND_HOST_TIMER_ERR); + if (error) { + *error = err; + } + return retVal; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + /** + * CL 1.2 version + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + //! \brief Wrapper for clCreateSubDevices(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + vector* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + vector ids(n); + err = clCreateSubDevices(object_, properties, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + + return CL_SUCCESS; + } +#elif defined(CL_HPP_USE_CL_DEVICE_FISSION) + +/** + * CL 1.1 version that uses device fission extension. + */ + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + vector* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_API_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + vector ids(n); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + return CL_SUCCESS; + } +#endif // defined(CL_HPP_USE_CL_DEVICE_FISSION) +}; + +using BuildLogType = vector::param_type>>; +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +/** +* Exception class for build errors to carry build info +*/ +class BuildError : public Error +{ +private: + BuildLogType buildLogs; +public: + BuildError(cl_int err, const char * errStr, const BuildLogType &vec) : Error(err, errStr), buildLogs(vec) + { + } + + BuildLogType getBuildLog() const + { + return buildLogs; + } +}; +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + if (err != CL_SUCCESS) { + throw BuildError(err, errStr, buildLogs); + } + return err; + } +} // namespace detail + +#else +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + (void)buildLogs; // suppress unused variable warning + (void)errStr; + return err; + } +} // namespace detail +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Platform default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + // If default wasn't passed ,generate one + // Otherwise set it + cl_uint n = 0; + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + if (n == 0) { + default_error_ = CL_INVALID_PLATFORM; + return; + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + + default_ = Platform(ids[0]); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Platform &p) { + default_ = p; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Platform(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Constructor from cl_platform_id. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This simply copies the platform ID value, which is an inexpensive operation. + */ + explicit Platform(const cl_platform_id &platform, bool retainObject = false) : + detail::Wrapper(platform, retainObject) { } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + static Platform getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default platform to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default platform. + * Should be compared to the passed value to ensure that it was updated. + */ + static Platform setDefault(const Platform &default_platform) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Wrapper for clGetPlatformInfo(). + template + cl_int getInfo(cl_platform_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + vector* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS && err != CL_DEVICE_NOT_FOUND) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + vector ids(n); + if (n>0) { + err = ::clGetDeviceIDs(object_, type, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } + return CL_SUCCESS; + } + +#if defined(CL_HPP_USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + vector* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + vector ids(n); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids.data(), + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + vector* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (platforms) { + platforms->resize(ids.size()); + + // Platforms don't reference count + for (size_type i = 0; i < ids.size(); i++) { + (*platforms)[i] = Platform(ids[i]); + } + } + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (platform) { + *platform = default_platform; + } + return err; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * \return Returns a valid platform if one is available. + * If no platform is available will return a null platform. + * Throws an exception if no platforms are available + * or an error condition occurs. + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (errResult) { + *errResult = err; + } + return default_platform; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +}; // class Platform + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; + + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_API_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Context default_; + static cl_int default_error_; + + /*! \brief Create the default context from the default device type in the default platform. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { +#if !defined(__APPLE__) && !defined(__MACOS) + const Platform &p = Platform::getDefault(); + cl_platform_id defaultPlatform = p(); + cl_context_properties properties[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0 + }; +#else // #if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties *properties = nullptr; +#endif // #if !defined(__APPLE__) && !defined(__MACOS) + + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + properties, + NULL, + NULL, + &default_error_); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + + /*! \brief Create the default context from a provided Context. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Context &c) { + default_ = c; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Context(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const vector& devices, + const cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs.data(), + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including a specific device. + * + * Wraps clCreateContext(). + */ + Context( + const Device& device, + const cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + const cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + vector platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + vector devices; + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + } catch (cl::Error& e) { + error = e.err(); + } + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context(const Context& ctx) : detail::Wrapper(ctx) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (const Context &ctx) + { + detail::Wrapper::operator=(ctx); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Context(Context&& ctx) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(ctx)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (Context &&ctx) + { + detail::Wrapper::operator=(std::move(ctx)); + return *this; + } + + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + /** + * Modify the default context to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default context. + * Should be compared to the passed value to ensure that it was updated. + */ + static Context setDefault(const Context &default_context) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + explicit Context(const cl_context& context, bool retainObject = false) : + detail::Wrapper(context, retainObject) { } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + vector* formats) const + { + cl_uint numEntries; + + if (!formats) { + return CL_SUCCESS; + } + + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + if (numEntries > 0) { + vector value(numEntries); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*)value.data(), + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(begin(value), end(value)); + } + else { + // If no values are being returned, ensure an empty vector comes back + formats->clear(); + } + + return CL_SUCCESS; + } +}; + +inline void Device::makeDefault() +{ + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + cl_int error = 0; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + default_error_ = error; + } + else { + default_ = context.getInfo()[0]; + default_error_ = CL_SUCCESS; + } + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif +} + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + explicit Event(const cl_event& event, bool retainObject = false) : + detail::Wrapper(event, retainObject) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const vector& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const vector& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * Optionally transfer ownership of a refcount on the cl_mem + * into the new Memory object. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * + * See Memory for further details. + */ + explicit Memory(const cl_mem& memory, bool retainObject) : + detail::Wrapper(memory, retainObject) { } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory(const Memory& mem) : detail::Wrapper(mem) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (const Memory &mem) + { + detail::Wrapper::operator=(mem); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory(Memory&& mem) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(mem)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (Memory &&mem) + { + detail::Wrapper::operator=(std::move(mem)); + return *this; + } + + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +namespace detail +{ + class SVMTraitNull + { + public: + static cl_svm_mem_flags getSVMMemFlags() + { + return 0; + } + }; +} // namespace detail + +template +class SVMTraitReadWrite +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_READ_WRITE | + Trait::getSVMMemFlags(); + } +}; + +template +class SVMTraitReadOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_READ_ONLY | + Trait::getSVMMemFlags(); + } +}; + +template +class SVMTraitWriteOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_WRITE_ONLY | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitCoarse +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitFine +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_SVM_FINE_GRAIN_BUFFER | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitAtomic +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return + CL_MEM_SVM_FINE_GRAIN_BUFFER | + CL_MEM_SVM_ATOMICS | + Trait::getSVMMemFlags(); + } +}; + +// Pre-declare SVM map function +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL); + +/** + * STL-like allocator class for managing SVM objects provided for convenience. + * + * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, + * care must be taken when using with smart pointers. + * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because + * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. + * + * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used + * with the allocate_shared and allocate_ptr supplied operations. + */ +template +class SVMAllocator { +private: + Context context_; + +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; + + SVMAllocator() : + context_(Context::getDefault()) + { + } + + explicit SVMAllocator(cl::Context context) : + context_(context) + { + } + + + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + template + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + ~SVMAllocator() + { + } + + pointer address(reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + const_pointer address(const_reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + /** + * Allocate an SVM pointer. + * + * If the allocator is coarse-grained, this will take ownership to allow + * containers to correctly construct data in place. + */ + pointer allocate( + size_type size, + typename cl::SVMAllocator::const_pointer = 0) + { + // Allocate memory with default alignment matching the size of the type + void* voidPointer = + clSVMAlloc( + context_(), + SVMTrait::getSVMMemFlags(), + size*sizeof(T), + 0); + pointer retValue = reinterpret_cast( + voidPointer); +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + if (!retValue) { + std::bad_alloc excep; + throw excep; + } +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + // If allocation was coarse-grained then map it + if (!(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); + if (err != CL_SUCCESS) { + std::bad_alloc excep; + throw excep; + } + } + + // If exceptions disabled, return null pointer from allocator + return retValue; + } + + void deallocate(pointer p, size_type) + { + clSVMFree(context_(), p); + } + + /** + * Return the maximum possible allocation size. + * This is the minimum of the maximum sizes of all devices in the context. + */ + size_type max_size() const CL_HPP_NOEXCEPT_ + { + size_type maxSize = std::numeric_limits::max() / sizeof(T); + + for (const Device &d : context_.getInfo()) { + maxSize = std::min( + maxSize, + static_cast(d.getInfo())); + } + + return maxSize; + } + + template< class U, class... Args > + void construct(U* p, Args&&... args) + { + new(p)T(args...); + } + + template< class U > + void destroy(U* p) + { + p->~U(); + } + + /** + * Returns true if the contexts match. + */ + inline bool operator==(SVMAllocator const& rhs) + { + return (context_==rhs.context_); + } + + inline bool operator!=(SVMAllocator const& a) + { + return !operator==(a); + } +}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); + + +template +class SVMAllocator { +public: + typedef void value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; +}; + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +namespace detail +{ + template + class Deleter { + private: + Alloc alloc_; + size_type copies_; + + public: + typedef typename std::allocator_traits::pointer pointer; + + Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } + { + } + + void operator()(pointer ptr) const { + Alloc tmpAlloc{ alloc_ }; + std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); + std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); + } + }; +} // namespace detail + +/** + * Allocation operation compatible with std::allocate_ptr. + * Creates a unique_ptr by default. + * This requirement is to ensure that the control block is not + * allocated in memory inaccessible to the host. + */ +template +cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) +{ + Alloc alloc(alloc_); + static const size_type copies = 1; + + // Ensure that creation of the management block and the + // object are dealt with separately such that we only provide a deleter + + T* tmp = std::allocator_traits::allocate(alloc, copies); + if (!tmp) { + std::bad_alloc excep; + throw excep; + } + try { + std::allocator_traits::construct( + alloc, + std::addressof(*tmp), + std::forward(args)...); + + return cl::pointer>(tmp, detail::Deleter{alloc, copies}); + } + catch (std::bad_alloc&) + { + std::allocator_traits::deallocate(alloc, tmp, copies); + throw; + } +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(Args... args) +{ + SVMAllocator alloc; + return cl::allocate_pointer(alloc, args...); +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(const cl::Context &c, Args... args) +{ + SVMAllocator alloc(c); + return cl::allocate_pointer(alloc, args...); +} +#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) + +/*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. + * + */ +template < class T > +using coarse_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers. +* +*/ +template < class T > +using fine_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support platform atomics. +* +*/ +template < class T > +using atomic_svm_vector = vector>>; + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified queue. + * If useHostPtr is specified iterators must be random access. + */ + template< typename IteratorType > + Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Buffer(const cl_mem& buffer, bool retainObject = false) : + Memory(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer(const Buffer& buf) : Memory(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (const Buffer &buf) + { + Memory::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer(Buffer&& buf) CL_HPP_NOEXCEPT_ : Memory(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (Buffer &&buf) + { + Memory::operator=(std::move(buf)); + return *this; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +}; + +#if defined (CL_HPP_USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) : pfn_clCreateFromD3D10BufferKHR(nullptr) + { + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); +#elif CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferD3D10(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(const BufferD3D10& buf) : + Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (const BufferD3D10 &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (BufferD3D10 &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL(const BufferGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (const BufferGL &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (BufferGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferRenderGL : public Buffer +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferRenderGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(const BufferRenderGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (const BufferRenderGL &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (BufferRenderGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image(const cl_mem& image, bool retainObject = false) : + Memory(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image(const Image& img) : Memory(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (const Image &img) + { + Memory::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image(Image&& img) CL_HPP_NOEXCEPT_ : Memory(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (Image &&img) + { + Memory::operator=(std::move(img)); + return *this; + } + + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = width; + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1D(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D(const Image1D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (const Image1D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D(Image1D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (Image1D &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + desc.image_width = width; + desc.buffer = buffer(); + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DBuffer(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(const Image1DBuffer& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (const Image1DBuffer &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (Image1DBuffer &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type arraySize, + size_type width, + size_type rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + desc.image_width = width; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DArray(const cl_mem& imageArray, bool retainObject = false) : + Image(imageArray, retainObject) { } + + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(const Image1DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (const Image1DArray &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (Image1DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 2D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + size_type height, + size_type row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + if (useCreateImage) + { + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) + /*! \brief Constructs a 2D Image from a buffer. + * \note This will share storage with the underlying buffer. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + ImageFormat format, + const Buffer &sourceBuffer, + size_type width, + size_type height, + size_type row_pitch = 0, + cl_int* err = nullptr) + { + cl_int error; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + desc.buffer = sourceBuffer(); + + object_ = ::clCreateImage( + context(), + 0, // flags inherited from buffer + &format, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } + } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief Constructs a 2D Image from an image. + * \note This will share storage with the underlying image but may + * reinterpret the channel order and type. + * + * The image will be created matching with a descriptor matching the source. + * + * \param order is the channel order to reinterpret the image data as. + * The channel order may differ as described in the OpenCL + * 2.0 API specification. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_channel_order order, + const Image &sourceImage, + cl_int* err = nullptr) + { + cl_int error; + + // Descriptor fields have to match source image + size_type sourceWidth = + sourceImage.getImageInfo(); + size_type sourceHeight = + sourceImage.getImageInfo(); + size_type sourceRowPitch = + sourceImage.getImageInfo(); + cl_uint sourceNumMIPLevels = + sourceImage.getImageInfo(); + cl_uint sourceNumSamples = + sourceImage.getImageInfo(); + cl_image_format sourceFormat = + sourceImage.getImageInfo(); + + // Update only the channel order. + // Channel format inherited from source. + sourceFormat.image_channel_order = order; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = sourceWidth; + desc.image_height = sourceHeight; + desc.image_row_pitch = sourceRowPitch; + desc.num_mip_levels = sourceNumMIPLevels; + desc.num_samples = sourceNumSamples; + desc.buffer = sourceImage(); + + object_ = ::clCreateImage( + context(), + 0, // flags should be inherited from mem_object + &sourceFormat, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } + } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2D(const cl_mem& image2D, bool retainObject = false) : + Image(image2D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D(const Image2D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (const Image2D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D(Image2D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (Image2D &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + + +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_API_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DGL(const cl_mem& image, bool retainObject = false) : + Image2D(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + *c + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(const Image2DGL& img) : Image2D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (const Image2DGL &img) + { + Image2D::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT_ : Image2D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (Image2DGL &&img) + { + Image2D::operator=(std::move(img)); + return *this; + } + +} CL_API_SUFFIX__VERSION_1_1_DEPRECATED; +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type arraySize, + size_type width, + size_type height, + size_type rowPitch, + size_type slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + desc.image_width = width; + desc.image_height = height; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + desc.image_slice_pitch = slicePitch; + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DArray(const cl_mem& imageArray, bool retainObject = false) : Image(imageArray, retainObject) { } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(const Image2DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (const Image2DArray &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (Image2DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + size_type height, + size_type depth, + size_type row_pitch = 0, + size_type slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + if (useCreateImage) + { + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = width; + desc.image_height = height; + desc.image_depth = depth; + desc.image_row_pitch = row_pitch; + desc.image_slice_pitch = slice_pitch; + + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + } + + //! \brief Default constructor - initializes to NULL. + Image3D() : Image() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3D(const cl_mem& image3D, bool retainObject = false) : + Image(image3D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D(const Image3D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (const Image3D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D(Image3D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (Image3D &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; + +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3DGL(const cl_mem& image, bool retainObject = false) : + Image3D(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(const Image3DGL& img) : Image3D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (const Image3DGL &img) + { + Image3D::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT_ : Image3D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (Image3DGL &&img) + { + Image3D::operator=(std::move(img)); + return *this; + } +}; +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit ImageGL(const cl_mem& image, bool retainObject = false) : + Image(image, retainObject) { } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL(const ImageGL& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (const ImageGL &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (ImageGL &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/*! \brief Class interface for Pipe Memory Objects. +* +* See Memory for details about copy semantics, etc. +* +* \see Memory +*/ +class Pipe : public Memory +{ +public: + + /*! \brief Constructs a Pipe in a specified context. + * + * Wraps clCreatePipe(). + * @param context Context in which to create the pipe. + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( + const Context& context, + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) + { + cl_int error; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Pipe in a the default context. + * + * Wraps clCreatePipe(). + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Pipe() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Pipe(const cl_mem& pipe, bool retainObject = false) : + Memory(pipe, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Pipe& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Pipe(const Pipe& pipe) : Memory(pipe) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Pipe& operator = (const Pipe &pipe) + { + Memory::operator=(pipe); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Pipe(Pipe&& pipe) CL_HPP_NOEXCEPT_ : Memory(std::move(pipe)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Pipe& operator = (Pipe &&pipe) + { + Memory::operator=(std::move(pipe)); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_pipe_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPipeInfo, object_, name, param), + __GET_PIPE_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_pipe_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; // class Pipe +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_sampler_properties sampler_properties[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, + CL_SAMPLER_ADDRESSING_MODE, addressing_mode, + CL_SAMPLER_FILTER_MODE, filter_mode, + 0 }; + object_ = ::clCreateSamplerWithProperties( + context(), + sampler_properties, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } +#endif + } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + explicit Sampler(const cl_sampler& sampler, bool retainObject = false) : + detail::Wrapper(sampler, retainObject) { } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler(const Sampler& sam) : detail::Wrapper(sam) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (const Sampler &sam) + { + detail::Wrapper::operator=(sam); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler(Sampler&& sam) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(sam)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (Sampler &&sam) + { + detail::Wrapper::operator=(std::move(sam)); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class DeviceCommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_type sizes_[3]; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { + sizes_[0] = 0; + sizes_[1] = 0; + sizes_[2] = 0; + } + + //! \brief Constructs one-dimensional range. + NDRange(size_type size0) + : dimensions_(1) + { + sizes_[0] = size0; + sizes_[1] = 1; + sizes_[2] = 1; + } + + //! \brief Constructs two-dimensional range. + NDRange(size_type size0, size_type size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = 1; + } + + //! \brief Constructs three-dimensional range. + NDRange(size_type size0, size_type size1, size_type size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const size_type *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const size_type*() const { + return sizes_; + } + + //! \brief Queries the number of dimensions in the range. + size_type dimensions() const + { + return dimensions_; + } + + //! \brief Returns the size of the object in bytes based on the + // runtime number of dimensions + size_type size() const + { + return dimensions_*sizeof(size_type); + } + + size_type* get() + { + return sizes_; + } + + const size_type* get() const + { + return sizes_; + } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + size_type size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler; + +// Enable for objects that are not subclasses of memory +// Pointers, constants etc +template +struct KernelArgumentHandler::value>::type> +{ + static size_type size(const T&) { return sizeof(T); } + static const T* ptr(const T& value) { return &value; } +}; + +// Enable for subclasses of memory where we want to get a reference to the cl_mem out +// and pass that in for safety +template +struct KernelArgumentHandler::value>::type> +{ + static size_type size(const T&) { return sizeof(cl_mem); } + static const cl_mem* ptr(const T& value) { return &(value()); } +}; + +// Specialization for DeviceCommandQueue defined later + +template <> +struct KernelArgumentHandler +{ + static size_type size(const LocalSpaceArg& value) { return value.size_; } + static const void* ptr(const LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(size_type size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + explicit Kernel(const cl_kernel& kernel, bool retainObject = false) : + detail::Wrapper(kernel, retainObject) { } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (const Kernel &kernel) + { + detail::Wrapper::operator=(kernel); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(kernel)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (Kernel &&kernel) + { + detail::Wrapper::operator=(std::move(kernel)); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if (CL_HPP_TARGET_OPENCL_VERSION >= 200 && defined(CL_HPP_USE_CL_SUB_GROUPS_KHR)) || CL_HPP_TARGET_OPENCL_VERSION >= 210 + cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, const cl::NDRange &range, size_type* param) const + { +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + return detail::errHandler( + clGetKernelSubGroupInfo(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; + static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); + + return detail::errHandler( + pfn_clGetKernelSubGroupInfoKHR(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + } + + template + size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, cl_int* err = NULL) const + { + size_type param; + cl_int result = getSubGroupInfo(dev, name, range, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief setArg overload taking a shared_ptr type + */ + template + cl_int setArg(cl_uint index, const cl::pointer &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a vector type. + */ + template + cl_int setArg(cl_uint index, const cl::vector &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a pointer type + */ + template + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr), + __SET_KERNEL_ARGS_ERR); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + /*! \brief setArg overload taking a POD type + */ + template + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T &value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, size_type size, const void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! + * Specify a vector of SVM pointers that the kernel may access in + * addition to its arguments. + */ + cl_int setSVMPointers(const vector &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! + * Specify a std::array of SVM pointers that the kernel may access in + * addition to its arguments. + */ + template + cl_int setSVMPointers(const std::array &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! \brief Enable fine-grained system SVM. + * + * \note It is only possible to enable fine-grained system SVM if all devices + * in the context associated with kernel support it. + * + * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. + * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION + * if no devices in the context support fine-grained system SVM. + * + * \see clSetKernelExecInfo + */ + cl_int enableFineGrainedSystemSVM(bool svmEnabled) + { + cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, + sizeof(cl_bool), + &svmEnabled_ + ) + ); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0, const pointer &t1, Ts & ... ts) + { + pointerList[index] = static_cast(t0.get()); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) + { + pointerList[index] = static_cast(t0); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0) + { + pointerList[index] = static_cast(t0.get()); + } + + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0) + { + pointerList[index] = static_cast(t0); + } + + template + cl_int setSVMPointers(const T0 &t0, Ts & ... ts) + { + std::array pointerList; + + setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*(1 + sizeof...(Ts)), + pointerList.data())); + } + + template + cl_int setExecInfo(cl_kernel_exec_info param_name, const T& val) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + param_name, + sizeof(T), + &val)); + } + + template + cl_int setExecInfo(typename detail::param_traits::param_type& val) + { + return setExecInfo(name, val); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Make a deep copy of the kernel object including its arguments. + * @return A new kernel object with internal state entirely separate from that + * of the original but with any arguments set on the original intact. + */ + Kernel clone() + { + cl_int error; + Kernel retValue(clCloneKernel(this->get(), &error)); + + detail::errHandler(error, __CLONE_KERNEL_ERR); + return retValue; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector> Binaries; + typedef vector Sources; +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector > Binaries; + typedef vector > Sources; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + + Program( + const string& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const size_type length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const string& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const size_type length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + /** + * Create a program from a vector of source strings and the default context. + * Does not compile or link the program. + */ + Program( + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + Context context = Context::getDefault(err); + + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Create a program from a vector of source strings and a provided context. + * Does not compile or link the program. + */ + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 || (CL_HPP_TARGET_OPENCL_VERSION==200 && defined(CL_HPP_USE_IL_KHR)) + /** + * Program constructor to allow construction of program from SPIR-V or another IL. + * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. + */ + Program( + const vector& IL, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + object_ = pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + /** + * Program constructor to allow construction of program from SPIR-V or another IL + * for a specific context. + * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. + */ + Program( + const Context& context, + const vector& IL, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + object_ = pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const vector& devices, + const Binaries& binaries, + vector* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const size_type numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + + vector lengths(numDevices); + vector images(numDevices); +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = binaries[i].data(); + lengths[i] = binaries[(int)i].size(); + } +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs.data(), + lengths.data(), images.data(), (binaryStatus != NULL && numDevices > 0) + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const vector& devices, + const string& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs.data(), + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + Program() { } + + + /*! \brief Constructor from cl_program - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit Program(const cl_program& program, bool retainObject = false) : + detail::Wrapper(program, retainObject) { } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program(const Program& program) : detail::Wrapper(program) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (const Program &program) + { + detail::Wrapper::operator=(program); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Program(Program&& program) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(program)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (Program &&program) + { + detail::Wrapper::operator=(std::move(program)); + return *this; + } + + cl_int build( + const vector& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + cl_int buildError = ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs.data(), + options, + notifyFptr, + data); + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + cl_int build( + const Device& device, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_device_id deviceID = device(); + + cl_int buildError = ::clBuildProgram( + object_, + 1, + &deviceID, + options, + notifyFptr, + data); + + BuildLogType buildLog(0); + buildLog.push_back(std::make_pair(device, getBuildInfo(device))); + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_int buildError = ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data); + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_int error = ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data); + return detail::buildErrHandler(error, __COMPILE_PROGRAM_ERR, getBuildInfo()); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * Build info function that returns a vector of device/info pairs for the specified + * info type and for all devices in the program. + * On an error reading the info for any device, an empty vector of info will be returned. + */ + template + vector::param_type>> + getBuildInfo(cl_int *err = NULL) const + { + cl_int result = CL_SUCCESS; + + auto devs = getInfo(&result); + vector::param_type>> + devInfo; + + // If there was an initial error from getInfo return the error + if (result != CL_SUCCESS) { + if (err != NULL) { + *err = result; + } + return devInfo; + } + + for (const cl::Device &d : devs) { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + result = getBuildInfo(d, name, ¶m); + devInfo.push_back( + std::pair::param_type> + (d, param)); + if (result != CL_SUCCESS) { + // On error, leave the loop and return the error code + break; + } + } + if (err != NULL) { + *err = result; + } + if (result != CL_SUCCESS) { + devInfo.clear(); + } + return devInfo; + } + + cl_int createKernels(vector* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + vector value(numKernels); + + err = ::clCreateKernelsInProgram( + object_, numKernels, value.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + if (kernels) { + kernels->resize(value.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < value.size(); i++) { + // We do not need to retain because this kernel is being created + // by the runtime + (*kernels)[i] = Kernel(value[i], false); + } + } + return CL_SUCCESS; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +#if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + /*! \brief Registers a callback function to be called when destructors for + * program scope global variables are complete and before the + * program is released. + * + * Wraps clSetProgramReleaseCallback(). + * + * Each call to this function registers the specified user callback function + * on a callback stack associated with program. The registered user callback + * functions are called in the reverse order in which they were registered. + */ + CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( + void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), + void * user_data = NULL) CL_API_SUFFIX__VERSION_2_2_DEPRECATED + { + return detail::errHandler( + ::clSetProgramReleaseCallback( + object_, + pfn_notify, + user_data), + __SET_PROGRAM_RELEASE_CALLBACK_ERR); + } +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + template + typename std::enable_if::value, cl_int>::type + setSpecializationConstant(cl_uint index, const T &value) + { + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + sizeof(value), + &value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + cl_int setSpecializationConstant(cl_uint index, size_type size, const void* value) + { + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + size, + value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog); +} + +inline Program linkProgram( + vector inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + vector programs(inputPrograms.size()); + + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + + Context ctx; + if(inputPrograms.size() > 0) { + ctx = inputPrograms[0].getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + } + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs.data(), + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog, false); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +// Template specialization for CL_PROGRAM_BINARIES +template <> +inline cl_int cl::Program::getInfo(cl_program_info name, vector>* param) const +{ + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; + } + if (param) { + // Resize the parameter array appropriately for each allocation + // and pass down to the helper + + vector sizes = getInfo(); + size_type numBinaries = sizes.size(); + + // Resize the parameter array and constituent arrays + param->resize(numBinaries); + for (size_type i = 0; i < numBinaries; ++i) { + (*param)[i].resize(sizes[i]); + } + + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + return CL_SUCCESS; +} + +template<> +inline vector> cl::Program::getInfo(cl_int* err) const +{ + vector> binariesVectors; + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); + if (err != NULL) { + *err = result; + } + return binariesVectors; +} + +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +// Template specialization for clSetProgramSpecializationConstant +template <> +inline cl_int cl::Program::setSpecializationConstant(cl_uint index, const bool &value) +{ + cl_uchar ucValue = value ? CL_UCHAR_MAX : 0; + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + sizeof(ucValue), + &ucValue), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +enum class QueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +}; + +inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +inline QueueProperties operator&(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static CommandQueue default_; + static cl_int default_error_; + + /*! \brief Create the default command queue returned by @ref getDefault. + * + * It sets default_error_ to indicate success or failure. It does not throw + * @c cl::Error. + */ + static void makeDefault() + { + /* We don't want to throw an error from this function, so we have to + * catch and set the error flag. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + int error; + Context context = Context::getDefault(&error); + + if (error != CL_SUCCESS) { + default_error_ = error; + } + else { + Device device = Device::getDefault(); + default_ = CommandQueue(context, device, 0, &default_error_); + } + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default command queue. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const CommandQueue &c) { + default_ = c; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = CommandQueue(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + } + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + + } + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( + const Context& context, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), devices[0](), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + std::call_once(default_initialized_, makeDefault); +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); +#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + /** + * Modify the default command queue to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default command queue. + * Should be compared to the passed value to ensure that it was updated. + */ + static CommandQueue setDefault(const CommandQueue &default_queue) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); + detail::errHandler(default_error_); + return default_; + } + + CommandQueue() { } + + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit CommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (const CommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (CommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + void *ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified as a vector type. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + * \tparam offset Is the offset in bytes into the buffer at + * which to start filling. This must be a multiple of + * the pattern size. + * \tparam size Is the size in bytes of the region to fill. + * This must be a multiple of the pattern size. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, + src(), + dst(), + src_origin.data(), + region.data(), + dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, + src(), + dst(), + src_offset, + dst_origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const array& origin, + const array& region, + size_type * row_pitch, + size_type * slice_pitch, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + origin.data(), + region.data(), + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + return result; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueMapSVM( + cl::pointer &ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr.get()), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueMapSVM( + cl::vector &container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(container.data()), container.size()*sizeof(T), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr.get()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(container.data()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const vector *events = 0, + Event *event = 0) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const vector *events = 0, + Event *event = 0) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const vector &memObjects, + cl_mem_migration_flags flags, + const vector* events = NULL, + Event* event = NULL + ) const + { + cl_event tmp; + + vector localMemObjects(memObjects.size()); + + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + localMemObjects.data(), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector &svmRawPointers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMigrateMem( + object_, + svmRawPointers.size(), static_cast(svmRawPointers.data()), + sizes.data(), // array of sizes not passed + flags, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MIGRATE_SVM_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector &svmRawPointers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmRawPointers, cl::vector(svmRawPointers.size()), flags, events, event); + } + + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmPointers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmPointers.size()); + for (auto p : svmPointers) { + svmRawPointers.push_back(static_cast(p.get())); + } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmPointers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmPointers, cl::vector(svmPointers.size()), flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from the beginning of each container to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmContainers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmContainers.size()); + for (auto p : svmContainers) { + svmRawPointers.push_back(static_cast(p.data())); + } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmContainers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmContainers, cl::vector(svmContainers.size()), flags, events, event); + } + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const size_type*) offset : NULL, + (const size_type*) global, + local.dimensions() != 0 ? (const size_type*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) + CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( + const Kernel& kernel, + const vector* events = NULL, + Event* event = NULL) const CL_API_SUFFIX__VERSION_1_2_DEPRECATED + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const vector* mem_objects = NULL, + const vector* mem_locs = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + size_type elements = 0; + if (mem_objects != NULL) { + elements = mem_objects->size(); + } + vector mems(elements); + for (unsigned int i = 0; i < elements; i++) { + mems[i] = ((*mem_objects)[i])(); + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems.data(), + (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarker( + object_, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector& events) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + events.size() > 0 ? (const cl_event*) &events.front() : NULL), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + + cl_int enqueueAcquireGLObjects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (CL_HPP_USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_API_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; // CommandQueue + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +enum class DeviceQueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, +}; + +inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/*! \class DeviceCommandQueue + * \brief DeviceCommandQueue interface for device cl_command_queues. + */ +class DeviceCommandQueue : public detail::Wrapper +{ +public: + + /*! + * Trivial empty constructor to create a null queue. + */ + DeviceCommandQueue() { } + + /*! + * Default construct device command queue on default context and device + */ + DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = NULL) + { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + cl_uint queueSize, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, + CL_QUEUE_SIZE, queueSize, + 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit DeviceCommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } + + DeviceCommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(const DeviceCommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (const DeviceCommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(DeviceCommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (DeviceCommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! + * Create a new default device command queue for the default device, + * in the default context and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + cl_int *err = nullptr) + { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_int *err = nullptr) + { + cl_int error; + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the requested size in bytes. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_uint queueSize, cl_int *err = nullptr) + { + cl_int error; + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + CL_QUEUE_SIZE, queueSize, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /*! + * Modify the default device command queue to be used for subsequent kernels. + * This can update the default command queue for a device repeatedly to account + * for kernels that rely on the default. + * @return updated default device command queue. + */ + static DeviceCommandQueue updateDefault(const Context &context, const Device &device, const DeviceCommandQueue &default_queue, cl_int *err = nullptr) + { + cl_int error; + error = clSetDefaultDeviceCommandQueue(context.get(), device.get(), default_queue.get()); + + detail::errHandler(error, __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + return default_queue; + } + + /*! + * Return the current default command queue for the specified command queue + */ + static DeviceCommandQueue getDefault(const CommandQueue &queue, cl_int * err = NULL) + { + return queue.getInfo(err); + } + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 +}; // DeviceCommandQueue + +namespace detail +{ + // Specialization for device command queue + template <> + struct KernelArgumentHandler + { + static size_type size(const cl::DeviceCommandQueue&) { return sizeof(cl_command_queue); } + static const cl_command_queue* ptr(const cl::DeviceCommandQueue& value) { return &(value()); } + }; +} // namespace detail + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +template< typename IteratorType > +Buffer::Buffer( + const CommandQueue &queue, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + Context context = queue.getInfo(); + + if (useHostPtr) { + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); + } + else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if (!useHostPtr) { + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events, + Event* event) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueMapSVM( + cl::pointer &ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueMapSVM( + cl::vector &container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + container, blocking, flags, events, event); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Blocking SVM map operation - performs a blocking map underneath. + */ +template +inline cl_int mapSVM(cl::vector &container) +{ + return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); +} + +/** +* Blocking SVM map operation - performs a blocking map underneath. +*/ +template +inline cl_int unmapSVM(cl::vector &container) +{ + return enqueueUnmapSVM(container); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + void *ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +class EnqueueArgs +{ +private: + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + vector events_; + + template + friend class KernelFunctor; + +public: + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const vector &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const vector &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + + +//---------------------------------------------------------------------------------------------- + + +/** + * Type safe kernel functor. + * + */ +template +class KernelFunctor +{ +private: + Kernel kernel_; + + template + void setArgs(T0&& t0, T1s&&... t1s) + { + kernel_.setArg(index, t0); + setArgs(std::forward(t1s)...); + } + + template + void setArgs(T0&& t0) + { + kernel_.setArg(index, t0); + } + + template + void setArgs() + { + } + + +public: + KernelFunctor(Kernel kernel) : kernel_(kernel) + {} + + KernelFunctor( + const Program& program, + const string name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + //! \brief Return type of the functor + typedef Event result_type; + + /** + * Enqueue kernel. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts) + { + Event event; + setArgs<0>(std::forward(ts)...); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + + /** + * Enqueue kernel with support for error code. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + * @param error Out parameter returning the error code from the execution. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts, + cl_int &error) + { + Event event; + setArgs<0>(std::forward(ts)...); + + error = args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_int setSVMPointers(const vector &pointerList) + { + return kernel_.setSVMPointers(pointerList); + } + + template + cl_int setSVMPointers(const T0 &t0, T1s &... ts) + { + return kernel_.setSVMPointers(t0, ts...); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + Kernel getKernel() + { + return kernel_; + } +}; + +namespace compatibility { + /** + * Backward compatibility class to ensure that cl.hpp code works with opencl.hpp. + * Please use KernelFunctor directly. + */ + template + struct make_kernel + { + typedef KernelFunctor FunctorType; + + FunctorType functor_; + + make_kernel( + const Program& program, + const string name, + cl_int * err = NULL) : + functor_(FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + functor_(FunctorType(kernel)) + {} + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + Ts...); + + Event operator()( + const EnqueueArgs& enqueueArgs, + Ts... args) + { + return functor_( + enqueueArgs, args...); + } + }; +} // namespace compatibility + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef CL_HPP_ERR_STR_ +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_PLATFORM_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_SUB_GROUP_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR +#undef __CREATE_BUFFER_ERR +#undef __COPY_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_GL_BUFFER_ERR +#undef __CREATE_GL_RENDER_BUFFER_ERR +#undef __GET_GL_OBJECT_INFO_ERR +#undef __CREATE_IMAGE_ERR +#undef __CREATE_GL_TEXTURE_ERR +#undef __IMAGE_DIMENSION_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __WAIT_FOR_EVENTS_ERR +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_IL_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_IL_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __COMPILE_PROGRAM_ERR +#undef __LINK_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR +#undef __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR +#undef __CREATE_SAMPLER_WITH_PROPERTIES_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_FILL_BUFFER_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_FILL_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_NATIVE_KERNEL +#undef __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR +#undef __ENQUEUE_MIGRATE_SVM_ERR +#undef __ENQUEUE_ACQUIRE_GL_ERR +#undef __ENQUEUE_RELEASE_GL_ERR +#undef __CREATE_PIPE_ERR +#undef __GET_PIPE_INFO_ERR +#undef __RETAIN_ERR +#undef __RELEASE_ERR +#undef __FLUSH_ERR +#undef __FINISH_ERR +#undef __VECTOR_CAPACITY_ERR +#undef __CREATE_SUB_DEVICES_ERR +#undef __CREATE_SUB_DEVICES_ERR +#undef __ENQUEUE_MARKER_ERR +#undef __ENQUEUE_WAIT_FOR_EVENTS_ERR +#undef __ENQUEUE_BARRIER_ERR +#undef __UNLOAD_COMPILER_ERR +#undef __CREATE_GL_TEXTURE_2D_ERR +#undef __CREATE_GL_TEXTURE_3D_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __ENQUEUE_TASK_ERR +#undef __CREATE_SAMPLER_ERR +#undef __ENQUEUE_MARKER_WAIT_LIST_ERR +#undef __ENQUEUE_BARRIER_WAIT_LIST_ERR +#undef __CLONE_KERNEL_ERR +#undef __GET_HOST_TIMER_ERR +#undef __GET_DEVICE_AND_HOST_TIMER_ERR + +#endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS + +// Extensions +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_ +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_ + +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +#undef CL_HPP_PARAM_NAME_DEVICE_FISSION_ +#endif // CL_HPP_USE_CL_DEVICE_FISSION + +#undef CL_HPP_NOEXCEPT_ +#undef CL_HPP_DEFINE_STATIC_MEMBER_ + +} // namespace cl + +#endif // CL_HPP_ diff --git a/Makefile b/Makefile index 5661d8bb2..f5fa3b870 100644 --- a/Makefile +++ b/Makefile @@ -97,7 +97,7 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CLBLAST - CFLAGS += -DGGML_USE_CLBLAST + CFLAGS += -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS LDFLAGS += -lclblast -lOpenCL endif ifdef LLAMA_GPROF @@ -121,11 +121,18 @@ ifneq ($(filter armv8%,$(UNAME_M)),) CFLAGS += -mfp16-format=ieee -mno-unaligned-access endif -BLAS_BUILD = +OPENBLAS_BUILD = ifeq ($(OS),Windows_NT) - BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS) + OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS) else - BLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.' + OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.' +endif + +CLBLAST_BUILD = +ifeq ($(OS),Windows_NT) + CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS) +else + CLBLAST_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use CLBlast, please install it seperately, then link it manually with LLAMA_CLBLAST=1. This is just a reminder, not an error.' endif # @@ -143,7 +150,7 @@ $(info I CC: $(CCV)) $(info I CXX: $(CXXV)) $(info ) -default: llamalib llamalib_blas +default: llamalib llamalib_openblas llamalib_clblast # # Build library @@ -152,8 +159,11 @@ default: llamalib llamalib_blas ggml.o: ggml.c ggml.h $(CC) $(CFLAGS) -c ggml.c -o ggml.o -ggml_blas.o: ggml.c ggml.h - $(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_blas.o +ggml_openblas.o: ggml.c ggml.h + $(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o + +ggml_clblast.o: ggml.c ggml.h + $(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h $(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o @@ -174,7 +184,7 @@ gpttype_adapter.o: $(CXX) $(CXXFLAGS) -c gpttype_adapter.cpp -o gpttype_adapter.o clean: - rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe gpt2.exe + rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_clblast.dll gptj.exe gpt2.exe main: examples/main/main.cpp ggml.o llama.o common.o $(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS) @@ -185,8 +195,11 @@ main: examples/main/main.cpp ggml.o llama.o common.o llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o $(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o -shared -o koboldcpp.dll $(LDFLAGS) -llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o - $(BLAS_BUILD) +llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o + $(OPENBLAS_BUILD) + +llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o + $(CLBLAST_BUILD) quantize: examples/quantize/quantize.cpp ggml.o llama.o $(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS) diff --git a/cblas.h b/cblas.h index 64ed8b327..fb8102294 100644 --- a/cblas.h +++ b/cblas.h @@ -1,3 +1,5 @@ +#pragma once + #ifndef CBLAS_H #define CBLAS_H diff --git a/clblast.dll b/clblast.dll new file mode 100644 index 000000000..f6d96c89a Binary files /dev/null and b/clblast.dll differ diff --git a/clblast_c.h b/clblast_c.h new file mode 100644 index 000000000..07f1fd0c2 --- /dev/null +++ b/clblast_c.h @@ -0,0 +1,1707 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains the plain C interface to the CLBlast BLAS routines, the counter-part of the +// normal 'clblast.h' C++ header. +// +// ================================================================================================= + +#ifndef CLBLAST_CLBLAST_C_H_ +#define CLBLAST_CLBLAST_C_H_ + +// Includes the normal OpenCL C header +#if defined(__APPLE__) || defined(__MACOSX) + #include +#else + #include +#endif + +// Exports library functions under Windows when building a DLL. See also: +// https://msdn.microsoft.com/en-us/library/a90k134d.aspx +#if defined(_WIN32) && defined(CLBLAST_DLL) + #if defined(COMPILING_DLL) + #define PUBLIC_API __declspec(dllexport) + #else + #define PUBLIC_API __declspec(dllimport) + #endif +#else + #define PUBLIC_API +#endif + +// Version numbering (v1.5.3) +#define CLBLAST_VERSION_MAJOR 1 +#define CLBLAST_VERSION_MINOR 5 +#define CLBLAST_VERSION_PATCH 3 + +// The C interface +#ifdef __cplusplus +extern "C" { +#endif + +// ================================================================================================= + +// Status codes. These codes can be returned by functions declared in this header file. The error +// codes match either the standard OpenCL error codes or the clBLAS error codes. +typedef enum CLBlastStatusCode_ { + + // Status codes in common with the OpenCL standard + CLBlastSuccess = 0, // CL_SUCCESS + CLBlastOpenCLCompilerNotAvailable= -3, // CL_COMPILER_NOT_AVAILABLE + CLBlastTempBufferAllocFailure = -4, // CL_MEM_OBJECT_ALLOCATION_FAILURE + CLBlastOpenCLOutOfResources = -5, // CL_OUT_OF_RESOURCES + CLBlastOpenCLOutOfHostMemory = -6, // CL_OUT_OF_HOST_MEMORY + CLBlastOpenCLBuildProgramFailure = -11, // CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error + CLBlastInvalidValue = -30, // CL_INVALID_VALUE + CLBlastInvalidCommandQueue = -36, // CL_INVALID_COMMAND_QUEUE + CLBlastInvalidMemObject = -38, // CL_INVALID_MEM_OBJECT + CLBlastInvalidBinary = -42, // CL_INVALID_BINARY + CLBlastInvalidBuildOptions = -43, // CL_INVALID_BUILD_OPTIONS + CLBlastInvalidProgram = -44, // CL_INVALID_PROGRAM + CLBlastInvalidProgramExecutable = -45, // CL_INVALID_PROGRAM_EXECUTABLE + CLBlastInvalidKernelName = -46, // CL_INVALID_KERNEL_NAME + CLBlastInvalidKernelDefinition = -47, // CL_INVALID_KERNEL_DEFINITION + CLBlastInvalidKernel = -48, // CL_INVALID_KERNEL + CLBlastInvalidArgIndex = -49, // CL_INVALID_ARG_INDEX + CLBlastInvalidArgValue = -50, // CL_INVALID_ARG_VALUE + CLBlastInvalidArgSize = -51, // CL_INVALID_ARG_SIZE + CLBlastInvalidKernelArgs = -52, // CL_INVALID_KERNEL_ARGS + CLBlastInvalidLocalNumDimensions = -53, // CL_INVALID_WORK_DIMENSION: Too many thread dimensions + CLBlastInvalidLocalThreadsTotal = -54, // CL_INVALID_WORK_GROUP_SIZE: Too many threads in total + CLBlastInvalidLocalThreadsDim = -55, // CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension + CLBlastInvalidGlobalOffset = -56, // CL_INVALID_GLOBAL_OFFSET + CLBlastInvalidEventWaitList = -57, // CL_INVALID_EVENT_WAIT_LIST + CLBlastInvalidEvent = -58, // CL_INVALID_EVENT + CLBlastInvalidOperation = -59, // CL_INVALID_OPERATION + CLBlastInvalidBufferSize = -61, // CL_INVALID_BUFFER_SIZE + CLBlastInvalidGlobalWorkSize = -63, // CL_INVALID_GLOBAL_WORK_SIZE + + // Status codes in common with the clBLAS library + CLBlastNotImplemented = -1024, // Routine or functionality not implemented yet + CLBlastInvalidMatrixA = -1022, // Matrix A is not a valid OpenCL buffer + CLBlastInvalidMatrixB = -1021, // Matrix B is not a valid OpenCL buffer + CLBlastInvalidMatrixC = -1020, // Matrix C is not a valid OpenCL buffer + CLBlastInvalidVectorX = -1019, // Vector X is not a valid OpenCL buffer + CLBlastInvalidVectorY = -1018, // Vector Y is not a valid OpenCL buffer + CLBlastInvalidDimension = -1017, // Dimensions M, N, and K have to be larger than zero + CLBlastInvalidLeadDimA = -1016, // LD of A is smaller than the matrix's first dimension + CLBlastInvalidLeadDimB = -1015, // LD of B is smaller than the matrix's first dimension + CLBlastInvalidLeadDimC = -1014, // LD of C is smaller than the matrix's first dimension + CLBlastInvalidIncrementX = -1013, // Increment of vector X cannot be zero + CLBlastInvalidIncrementY = -1012, // Increment of vector Y cannot be zero + CLBlastInsufficientMemoryA = -1011, // Matrix A's OpenCL buffer is too small + CLBlastInsufficientMemoryB = -1010, // Matrix B's OpenCL buffer is too small + CLBlastInsufficientMemoryC = -1009, // Matrix C's OpenCL buffer is too small + CLBlastInsufficientMemoryX = -1008, // Vector X's OpenCL buffer is too small + CLBlastInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small + + // Custom additional status codes for CLBlast + CLBlastInsufficientMemoryTemp = -2050, // Temporary buffer provided to GEMM routine is too small + CLBlastInvalidBatchCount = -2049, // The batch count needs to be positive + CLBlastInvalidOverrideKernel = -2048, // Trying to override parameters for an invalid kernel + CLBlastMissingOverrideParameter = -2047, // Missing override parameter(s) for the target kernel + CLBlastInvalidLocalMemUsage = -2046, // Not enough local memory available on this device + CLBlastNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device + CLBlastNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device + CLBlastInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer + CLBlastInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small + CLBlastDatabaseError = -2041, // Entry for the device was not found in the database + CLBlastUnknownError = -2040, // A catch-all error code representing an unspecified error + CLBlastUnexpectedError = -2039, // A catch-all error code representing an unexpected exception +} CLBlastStatusCode; + +// Matrix layout and transpose types +typedef enum CLBlastLayout_ { CLBlastLayoutRowMajor = 101, + CLBlastLayoutColMajor = 102 } CLBlastLayout; +typedef enum CLBlastTranspose_ { CLBlastTransposeNo = 111, CLBlastTransposeYes = 112, + CLBlastTransposeConjugate = 113 } CLBlastTranspose; +typedef enum CLBlastTriangle_ { CLBlastTriangleUpper = 121, + CLBlastTriangleLower = 122 } CLBlastTriangle; +typedef enum CLBlastDiagonal_ { CLBlastDiagonalNonUnit = 131, + CLBlastDiagonalUnit = 132 } CLBlastDiagonal; +typedef enum CLBlastSide_ { CLBlastSideLeft = 141, CLBlastSideRight = 142 } CLBlastSide; +typedef enum CLBlastKernelMode_ { CLBlastKernelModeCrossCorrelation = 151, CLBlastKernelModeConvolution = 152 } CLBlastKernelMode; + +// Precision enum (values in bits) +typedef enum CLBlastPrecision_ { CLBlastPrecisionHalf = 16, CLBlastPrecisionSingle = 32, + CLBlastPrecisionDouble = 64, CLBlastPrecisionComplexSingle = 3232, + CLBlastPrecisionComplexDouble = 6464 } CLBlastPrecision; + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines +// ================================================================================================= + +// Generate givens plane rotation: SROTG/DROTG +CLBlastStatusCode PUBLIC_API CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset, + cl_mem sb_buffer, const size_t sb_offset, + cl_mem sc_buffer, const size_t sc_offset, + cl_mem ss_buffer, const size_t ss_offset, + cl_command_queue* queue, cl_event* event); + +// Generate modified givens plane rotation: SROTMG/DROTMG +CLBlastStatusCode PUBLIC_API CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + const cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset, + cl_mem sd2_buffer, const size_t sd2_offset, + cl_mem sx1_buffer, const size_t sx1_offset, + const cl_mem sy1_buffer, const size_t sy1_offset, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); + +// Apply givens plane rotation: SROT/DROT +CLBlastStatusCode PUBLIC_API CLBlastSrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float cos, + const float sin, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDrot(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double cos, + const double sin, + cl_command_queue* queue, cl_event* event); + +// Apply modified givens plane rotation: SROTM/DROTM +CLBlastStatusCode PUBLIC_API CLBlastSrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDrotm(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem sparam_buffer, const size_t sparam_offset, + cl_command_queue* queue, cl_event* event); + +// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP +CLBlastStatusCode PUBLIC_API CLBlastSswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL +CLBlastStatusCode PUBLIC_API CLBlastSscal(const size_t n, + const float alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDscal(const size_t n, + const double alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCscal(const size_t n, + const cl_float2 alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZscal(const size_t n, + const cl_double2 alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHscal(const size_t n, + const cl_half alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY +CLBlastStatusCode PUBLIC_API CLBlastScopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY +CLBlastStatusCode PUBLIC_API CLBlastSaxpy(const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDaxpy(const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCaxpy(const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZaxpy(const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHaxpy(const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two vectors: SDOT/DDOT/HDOT +CLBlastStatusCode PUBLIC_API CLBlastSdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors: CDOTU/ZDOTU +CLBlastStatusCode PUBLIC_API CLBlastCdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC +CLBlastStatusCode PUBLIC_API CLBlastCdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 +CLBlastStatusCode PUBLIC_API CLBlastSnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastScnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDznrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM +CLBlastStatusCode PUBLIC_API CLBlastSasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastScasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDzasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM +CLBlastStatusCode PUBLIC_API CLBlastSsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastScsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDzsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX +CLBlastStatusCode PUBLIC_API CLBlastiSamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiDamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiCamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiZamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiHamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN +CLBlastStatusCode PUBLIC_API CLBlastiSamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiDamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiCamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiZamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiHamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX +CLBlastStatusCode PUBLIC_API CLBlastiSmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiDmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiCmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiZmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiHmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN +CLBlastStatusCode PUBLIC_API CLBlastiSmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiDmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiCmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiZmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastiHmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines +// ================================================================================================= + +// General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV +CLBlastStatusCode PUBLIC_API CLBlastSgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV +CLBlastStatusCode PUBLIC_API CLBlastSgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Hermitian matrix-vector multiplication: CHEMV/ZHEMV +CLBlastStatusCode PUBLIC_API CLBlastChemv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV +CLBlastStatusCode PUBLIC_API CLBlastChbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV +CLBlastStatusCode PUBLIC_API CLBlastChpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_float2 alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_double2 alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV +CLBlastStatusCode PUBLIC_API CLBlastSsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV +CLBlastStatusCode PUBLIC_API CLBlastSsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV +CLBlastStatusCode PUBLIC_API CLBlastSspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event); + +// Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV +CLBlastStatusCode PUBLIC_API CLBlastStrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV +CLBlastStatusCode PUBLIC_API CLBlastStbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV +CLBlastStatusCode PUBLIC_API CLBlastStpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV +CLBlastStatusCode PUBLIC_API CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV +CLBlastStatusCode PUBLIC_API CLBlastStbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV +CLBlastStatusCode PUBLIC_API CLBlastStpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + +// General rank-1 matrix update: SGER/DGER/HGER +CLBlastStatusCode PUBLIC_API CLBlastSger(const CLBlastLayout layout, + const size_t m, const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDger(const CLBlastLayout layout, + const size_t m, const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHger(const CLBlastLayout layout, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// General rank-1 complex matrix update: CGERU/ZGERU +CLBlastStatusCode PUBLIC_API CLBlastCgeru(const CLBlastLayout layout, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgeru(const CLBlastLayout layout, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// General rank-1 complex conjugated matrix update: CGERC/ZGERC +CLBlastStatusCode PUBLIC_API CLBlastCgerc(const CLBlastLayout layout, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgerc(const CLBlastLayout layout, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// Hermitian rank-1 matrix update: CHER/ZHER +CLBlastStatusCode PUBLIC_API CLBlastCher(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// Hermitian packed rank-1 matrix update: CHPR/ZHPR +CLBlastStatusCode PUBLIC_API CLBlastChpr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); + +// Hermitian rank-2 matrix update: CHER2/ZHER2 +CLBlastStatusCode PUBLIC_API CLBlastCher2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 +CLBlastStatusCode PUBLIC_API CLBlastChpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); + +// Symmetric rank-1 matrix update: SSYR/DSYR/HSYR +CLBlastStatusCode PUBLIC_API CLBlastSsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR +CLBlastStatusCode PUBLIC_API CLBlastSspr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHspr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); + +// Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 +CLBlastStatusCode PUBLIC_API CLBlastSsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_command_queue* queue, cl_event* event); + +// Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 +CLBlastStatusCode PUBLIC_API CLBlastSspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines +// ================================================================================================= + +// General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM +CLBlastStatusCode PUBLIC_API CLBlastSgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM +CLBlastStatusCode PUBLIC_API CLBlastSsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM +CLBlastStatusCode PUBLIC_API CLBlastChemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK +CLBlastStatusCode PUBLIC_API CLBlastSsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Rank-K update of a hermitian matrix: CHERK/ZHERK +CLBlastStatusCode PUBLIC_API CLBlastCherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K +CLBlastStatusCode PUBLIC_API CLBlastSsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K +CLBlastStatusCode PUBLIC_API CLBlastCher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM +CLBlastStatusCode PUBLIC_API CLBlastStrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); + +// Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM +CLBlastStatusCode PUBLIC_API CLBlastStrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= +// Extra non-BLAS routines (level-X) +// ================================================================================================= + +// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD +CLBlastStatusCode PUBLIC_API CLBlastShad(const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDhad(const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastChad(const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_float2 beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhad(const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_double2 beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHhad(const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_half beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); + +// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY +CLBlastStatusCode PUBLIC_API CLBlastSomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastComatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const size_t m, const size_t n, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_command_queue* queue, cl_event* event); + +// Im2col function (non-BLAS function): SIM2COL/DIM2COL/CIM2COL/ZIM2COL/HIM2COL +CLBlastStatusCode PUBLIC_API CLBlastSim2col(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem im_buffer, const size_t im_offset, + cl_mem col_buffer, const size_t col_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDim2col(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem im_buffer, const size_t im_offset, + cl_mem col_buffer, const size_t col_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCim2col(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem im_buffer, const size_t im_offset, + cl_mem col_buffer, const size_t col_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZim2col(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem im_buffer, const size_t im_offset, + cl_mem col_buffer, const size_t col_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHim2col(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem im_buffer, const size_t im_offset, + cl_mem col_buffer, const size_t col_offset, + cl_command_queue* queue, cl_event* event); + +// Col2im function (non-BLAS function): SCOL2IM/DCOL2IM/CCOL2IM/ZCOL2IM/HCOL2IM +CLBlastStatusCode PUBLIC_API CLBlastScol2im(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem col_buffer, const size_t col_offset, + cl_mem im_buffer, const size_t im_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDcol2im(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem col_buffer, const size_t col_offset, + cl_mem im_buffer, const size_t im_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCcol2im(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem col_buffer, const size_t col_offset, + cl_mem im_buffer, const size_t im_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZcol2im(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem col_buffer, const size_t col_offset, + cl_mem im_buffer, const size_t im_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHcol2im(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, + const cl_mem col_buffer, const size_t col_offset, + cl_mem im_buffer, const size_t im_offset, + cl_command_queue* queue, cl_event* event); + +// Batched convolution as GEMM (non-BLAS function): SCONVGEMM/DCONVGEMM/HCONVGEMM +CLBlastStatusCode PUBLIC_API CLBlastSconvgemm(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count, + const cl_mem im_buffer, const size_t im_offset, + const cl_mem kernel_buffer, const size_t kernel_offset, + cl_mem result_buffer, const size_t result_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDconvgemm(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count, + const cl_mem im_buffer, const size_t im_offset, + const cl_mem kernel_buffer, const size_t kernel_offset, + cl_mem result_buffer, const size_t result_offset, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHconvgemm(const CLBlastKernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count, + const cl_mem im_buffer, const size_t im_offset, + const cl_mem kernel_buffer, const size_t kernel_offset, + cl_mem result_buffer, const size_t result_offset, + cl_command_queue* queue, cl_event* event); + +// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED +CLBlastStatusCode PUBLIC_API CLBlastSaxpyBatched(const size_t n, + const float *alphas, + const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, + cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDaxpyBatched(const size_t n, + const double *alphas, + const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, + cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCaxpyBatched(const size_t n, + const cl_float2 *alphas, + const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, + cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZaxpyBatched(const size_t n, + const cl_double2 *alphas, + const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, + cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHaxpyBatched(const size_t n, + const cl_half *alphas, + const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, + cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); + +// Batched version of GEMM: SGEMMBATCHED/DGEMMBATCHED/CGEMMBATCHED/ZGEMMBATCHED/HGEMMBATCHED +CLBlastStatusCode PUBLIC_API CLBlastSgemmBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float *alphas, + const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, + const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, + const float *betas, + cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDgemmBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double *alphas, + const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, + const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, + const double *betas, + cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCgemmBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_float2 *alphas, + const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, + const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, + const cl_float2 *betas, + cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgemmBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_double2 *alphas, + const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, + const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, + const cl_double2 *betas, + cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHgemmBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_half *alphas, + const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, + const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, + const cl_half *betas, + cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); + +// StridedBatched version of GEMM: SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED +CLBlastStatusCode PUBLIC_API CLBlastSgemmStridedBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDgemmStridedBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCgemmStridedBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZgemmStridedBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHgemmStridedBatched(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= +// General matrix-matrix multiplication with temporary buffer from user (optional, for advanced users): SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM +CLBlastStatusCode PUBLIC_API CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); +CLBlastStatusCode PUBLIC_API CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); +CLBlastStatusCode PUBLIC_API CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); +CLBlastStatusCode PUBLIC_API CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); +CLBlastStatusCode PUBLIC_API CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + +// ================================================================================================= +// Retrieves the required size of the temporary buffer for the GEMM kernel: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM (optional) +CLBlastStatusCode PUBLIC_API CLBlastSGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); + +CLBlastStatusCode PUBLIC_API CLBlastDGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); + +CLBlastStatusCode PUBLIC_API CLBlastCGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); + +CLBlastStatusCode PUBLIC_API CLBlastZGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); + +CLBlastStatusCode PUBLIC_API CLBlastHGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); + +// ================================================================================================= + +// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on +// for the same device. This cache can be cleared to free up system memory or in case of debugging. +CLBlastStatusCode PUBLIC_API CLBlastClearCache(); + +// The cache can also be pre-initialized for a specific device with all possible CLBlast kernels. +// Further CLBlast routine calls will then run at maximum speed. +CLBlastStatusCode PUBLIC_API CLBlastFillCache(const cl_device_id device); + +// ================================================================================================= + +// Overrides tuning parameters for a specific device-precision-kernel combination. The next time +// the target routine is called it will re-compile and use the new parameters from then on. +CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, + const CLBlastPrecision precision, const size_t num_parameters, + const char** parameters_names, const size_t* parameters_values); + +// ================================================================================================= + +#ifdef __cplusplus +} // extern "C" +#endif + +// CLBLAST_CLBLAST_C_H_ +#endif diff --git a/expose.cpp b/expose.cpp index 604031159..55186c62f 100644 --- a/expose.cpp +++ b/expose.cpp @@ -31,6 +31,14 @@ extern "C" std::string model = inputs.model_filename; file_format = check_file_format(model.c_str()); + //first digit is platform, second is devices + int platform = inputs.clblast_info/10; + int devices = inputs.clblast_info%10; + std::string platformenv = "KCPP_CBLAST_PLATFORM="+std::to_string(platform); + std::string deviceenv = "KCPP_CBLAST_DEVICES="+std::to_string(devices); + putenv(platformenv.c_str()); + putenv(deviceenv.c_str()); + if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3) { printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format); diff --git a/expose.h b/expose.h index 2f66c2d26..4f255c980 100644 --- a/expose.h +++ b/expose.h @@ -9,6 +9,7 @@ struct load_model_inputs const char *model_filename; const int n_parts_overwrite = -1; const bool use_mmap; + const int clblast_info = 0; }; struct generation_inputs { diff --git a/ggml.c b/ggml.c index 5fa143835..3b7e35c17 100644 --- a/ggml.c +++ b/ggml.c @@ -123,26 +123,10 @@ typedef void* thread_ret_t; } \ } while (0) -#if GGML_USE_CLBLAST -#ifndef GGML_USE_OPENBLAS -#define GGML_USE_OPENBLAS -#endif - -#define CL_TARGET_OPENCL_VERSION 110 -#include - -cl_platform_id platform; -cl_device_id device; -cl_context context; -cl_command_queue queue; -cl_event event; -bool cl_initialized = false; -#endif - #ifdef GGML_USE_ACCELERATE #include -#elif defined(GGML_USE_OPENBLAS) -#include +#elif GGML_USE_OPENBLAS +#include #endif #undef MIN @@ -6330,7 +6314,7 @@ static void ggml_compute_forward_rms_norm( // ggml_compute_forward_mul_mat -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // helper function to determine if it is better to use BLAS or not // for large matrices, BLAS is faster static bool ggml_compute_forward_mul_mat_use_blas( @@ -6355,85 +6339,6 @@ static bool ggml_compute_forward_mul_mat_use_blas( return false; } - -#ifdef GGML_USE_CLBLAST -static bool ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) { - cl_int err = 0; - - if (!cl_initialized) { - cl_uint num_platforms; - clGetPlatformIDs(0, NULL, &num_platforms); - cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); - clGetPlatformIDs(num_platforms, platforms, NULL); - platform = platforms[0]; - cl_uint num_devices; - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); - cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); - device = devices[0]; - context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); - if (err != CL_SUCCESS) { - printf("Error creating OpenCL context: %d\n", err); - fflush(stdout); - } - queue = clCreateCommandQueue(context, device, 0, &err); - event = NULL; - - if (err != CL_SUCCESS) { - printf("Error creating OpenCL Command Queue: %d\n", err); - fflush(stdout); - } - - free(platforms); - free(devices); - cl_initialized = true; - } - - // Prepare buffers - cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("Error creating OpenCL Buffer A: %d\n", err); - fflush(stdout); - } - cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("Error creating OpenCL Buffer B: %d\n", err); - fflush(stdout); - } - cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err); - if (err != CL_SUCCESS) { - printf("Error creating OpenCL Buffer C: %d\n", err); - fflush(stdout); - } - - clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL); - clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL); - clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL); - - // Call the SGEMM routine. - CLBlastStatusCode status = CLBlastSgemm(order, - trans_a, trans_b, - m, n, k, - alpha, - cl_buffer_a, 0, lda, - cl_buffer_b, 0, ldb, - beta, - cl_buffer_c, 0, ldc, - &queue, &event); - - // Wait for completion - if (status == CLBlastSuccess) { - clWaitForEvents(1, &event); - clReleaseEvent(event); - } - - clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL); - - clReleaseMemObject(cl_buffer_a); - clReleaseMemObject(cl_buffer_b); - clReleaseMemObject(cl_buffer_c); -} -#endif #endif static void ggml_compute_forward_mul_mat_f32( @@ -6449,7 +6354,7 @@ static void ggml_compute_forward_mul_mat_f32( const int64_t ne02 = src0->ne[2]; const int64_t ne03 = src0->ne[3]; -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) const int64_t ne10 = src1->ne[0]; #endif const int64_t ne11 = src1->ne[1]; @@ -6506,7 +6411,7 @@ static void ggml_compute_forward_mul_mat_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (params->ith != 0) { return; @@ -6528,19 +6433,11 @@ static void ggml_compute_forward_mul_mat_f32( float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); // zT = y * xT -#ifdef GGML_USE_CLBLAST - ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans, + do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne11, ne01, ne10, 1.0f, y, ne10, x, ne10, 0.0f, d, ne01); -#else - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ne11, ne01, ne10, - 1.0f, y, ne10, - x, ne10, - 0.0f, d, ne01); -#endif } } @@ -6673,7 +6570,7 @@ static void ggml_compute_forward_mul_mat_f16_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { GGML_ASSERT(nb10 == sizeof(float)); @@ -6708,19 +6605,11 @@ static void ggml_compute_forward_mul_mat_f16_f32( float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); // zT = y * xT -#ifdef GGML_USE_CLBLAST - ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans, + do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne11, ne01, ne10, 1.0f, y, ne10, x, ne10, 0.0f, d, ne01); -#else - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ne11, ne01, ne10, - 1.0f, y, ne10, - x, ne10, - 0.0f, d, ne01); -#endif } } @@ -6896,7 +6785,7 @@ static void ggml_compute_forward_mul_mat_q_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (params->ith != 0) { return; @@ -6929,19 +6818,11 @@ static void ggml_compute_forward_mul_mat_q_f32( float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); // zT = y * xT -#ifdef GGML_USE_CLBLAST - ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans, + do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne11, ne01, ne10, 1.0f, y, ne10, x, ne10, 0.0f, d, ne01); -#else - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ne11, ne01, ne10, - 1.0f, y, ne10, - x, ne10, - 0.0f, d, ne01); -#endif } } @@ -9672,7 +9553,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) size_t cur = 0; if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning @@ -9689,7 +9570,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) { cur = 0; } else if (quantize_fns[node->src0->type].vec_dot_q && node->src1->type == GGML_TYPE_F32) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); @@ -10986,7 +10867,7 @@ int ggml_cpu_has_wasm_simd(void) { } int ggml_cpu_has_blas(void) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) return 1; #else return 0; diff --git a/ggml_blas_adapter.c b/ggml_blas_adapter.c new file mode 100644 index 000000000..0f6ca4d99 --- /dev/null +++ b/ggml_blas_adapter.c @@ -0,0 +1,124 @@ +//this is a drop-in for all CLBlast related code, to keep the main ggml.c unmodified +// we will imitate the function definition from OpenBLAS instead, replaced as necessary. + +//windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license) +//windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license) + +#include +#include +#include + +#if GGML_USE_CLBLAST + +#define CL_TARGET_OPENCL_VERSION 110 +#include + +cl_platform_id platform; +cl_device_id device; +cl_context context; +cl_command_queue queue; +cl_event event; +bool cl_initialized = false; + +static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) { + cl_int err = 0; + + if (!cl_initialized) { + char * KCPP_CBLAST_PLATFORM = getenv("KCPP_CBLAST_PLATFORM"); + char * KCPP_CBLAST_DEVICES = getenv("KCPP_CBLAST_DEVICES"); + int plat_num = (KCPP_CBLAST_PLATFORM == NULL ? 0 : atoi(KCPP_CBLAST_PLATFORM)); + int dev_num = (KCPP_CBLAST_DEVICES == NULL ? 0 : atoi(KCPP_CBLAST_DEVICES)); + printf("\nInitializing CLBlast (First Run)..."); + printf("\nSelected: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num); + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, platforms, NULL); + platform = platforms[plat_num]; + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + device = devices[dev_num]; + context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); + if (err != CL_SUCCESS) { + printf("Error creating OpenCL context: %d\n", err); + fflush(stdout); + } + queue = clCreateCommandQueue(context, device, 0, &err); + event = NULL; + + if (err != CL_SUCCESS) { + printf("Error creating OpenCL Command Queue: %d\n", err); + fflush(stdout); + } + + free(platforms); + free(devices); + cl_initialized = true; + } + + // Prepare buffers + cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("Error creating OpenCL Buffer A: %d\n", err); + fflush(stdout); + } + cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("Error creating OpenCL Buffer B: %d\n", err); + fflush(stdout); + } + cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("Error creating OpenCL Buffer C: %d\n", err); + fflush(stdout); + } + + clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL); + clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL); + clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL); + + // Call the SGEMM routine. + CLBlastStatusCode status = CLBlastSgemm(order, + trans_a, trans_b, + m, n, k, + alpha, + cl_buffer_a, 0, lda, + cl_buffer_b, 0, ldb, + beta, + cl_buffer_c, 0, ldc, + &queue, &event); + + // Wait for completion + if (status == CLBlastSuccess) { + clWaitForEvents(1, &event); + clReleaseEvent(event); + } + + clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL); + + clReleaseMemObject(cl_buffer_a); + clReleaseMemObject(cl_buffer_b); + clReleaseMemObject(cl_buffer_c); +} + +#endif + +static void do_blas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, +OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc) +{ +#if GGML_USE_CLBLAST + ggml_cl_sgemm_wrapper(Order, TransA, TransB, + M, N, K, + alpha, A, lda, + B, ldb, + beta, C, ldc); +#else + cblas_sgemm(Order, TransA, TransB, + M, N, K, + alpha, A, lda, + B, ldb, + beta, C, ldc); +#endif +} \ No newline at end of file diff --git a/koboldcpp.dll b/koboldcpp.dll deleted file mode 100644 index e102fd991..000000000 Binary files a/koboldcpp.dll and /dev/null differ diff --git a/koboldcpp.py b/koboldcpp.py index c50f3920f..85b14e395 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -15,7 +15,8 @@ class load_model_inputs(ctypes.Structure): ("f16_kv", ctypes.c_bool), ("model_filename", ctypes.c_char_p), ("n_parts_overwrite", ctypes.c_int), - ("use_mmap", ctypes.c_bool)] + ("use_mmap", ctypes.c_bool), + ("clblast_info", ctypes.c_int)] class generation_inputs(ctypes.Structure): _fields_ = [("seed", ctypes.c_int), @@ -34,12 +35,15 @@ class generation_outputs(ctypes.Structure): handle = None use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. +use_clblast = False #uses CLBlast instead def init_library(): - global handle, use_blas + global handle, use_blas, use_clblast libname = "" if use_blas: - libname = "koboldcpp_blas.dll" + libname = "koboldcpp_openblas.dll" + elif use_clblast: + libname = "koboldcpp_clblast.dll" else: libname = "koboldcpp.dll" @@ -63,6 +67,10 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr inputs.n_parts_overwrite = n_parts_overwrite inputs.f16_kv = True inputs.use_mmap = use_mmap + clblastids = 0 + if args.useclblast: + clblastids = int(args.useclblast[0])*10 + int(args.useclblast[1]) + inputs.clblast_info = clblastids ret = handle.load_model(inputs) return ret @@ -301,13 +309,19 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None): sys.exit(0) def main(args): - global use_blas - if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")): - print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.") + global use_blas, use_clblast + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")): + print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.") use_blas = False elif os.name != 'nt': print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1") use_blas = False + elif args.useclblast: + if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")): + print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.") + else: + print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.") + use_clblast = True elif not args.noblas: print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.") use_blas = True @@ -397,5 +411,6 @@ if __name__ == '__main__': parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true') parser.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true') parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') + parser.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) args = parser.parse_args() main(args) diff --git a/koboldcpp_blas.dll b/koboldcpp_blas.dll deleted file mode 100644 index c9a558381..000000000 Binary files a/koboldcpp_blas.dll and /dev/null differ diff --git a/lib/OpenCL.lib b/lib/OpenCL.lib new file mode 100644 index 000000000..60ab6e0b0 Binary files /dev/null and b/lib/OpenCL.lib differ diff --git a/lib/clblast.lib b/lib/clblast.lib new file mode 100644 index 000000000..9d01d714f Binary files /dev/null and b/lib/clblast.lib differ diff --git a/libopenblas.lib b/lib/libopenblas.lib similarity index 100% rename from libopenblas.lib rename to lib/libopenblas.lib diff --git a/llama_adapter.cpp b/llama_adapter.cpp index ae873efa6..647eb91a6 100644 --- a/llama_adapter.cpp +++ b/llama_adapter.cpp @@ -251,12 +251,10 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out last_n_tokens.push_back(embd_inp[input_consumed]); current_context_tokens.push_back(embd_inp[input_consumed]); ++input_consumed; -#ifndef GGML_USE_CLBLAST if ((int)embd.size() >= params.n_batch) { break; } -#endif } } } diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 240b303e1..eb2b3bfe1 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1 @@ -pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file