integrated optional (experimentl) CLBlast support

This commit is contained in:
Concedo 2023-04-11 23:33:44 +08:00
parent c9f18082fd
commit 23c675b2e6
53 changed files with 22095 additions and 151 deletions

50
CL/Utils/Context.h Normal file
View file

@ -0,0 +1,50 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
// OpenCL includes
#include <CL/cl.h>
// STL includes
#include <time.h>
UTILS_EXPORT
cl_context cl_util_get_context(const cl_uint plat_id, const cl_uint dev_id,
const cl_device_type type, cl_int* const error);
UTILS_EXPORT
cl_device_id cl_util_get_device(const cl_uint plat_id, const cl_uint dev_id,
const cl_device_type type, cl_int* const error);
UTILS_EXPORT
cl_int cl_util_print_device_info(const cl_device_id device);
UTILS_EXPORT
char* cl_util_get_device_info(const cl_device_id device,
const cl_device_info info, cl_int* const error);
UTILS_EXPORT
char* cl_util_get_platform_info(const cl_platform_id platform,
const cl_platform_info info,
cl_int* const error);
// build program and show log if build is not successful
UTILS_EXPORT
cl_int cl_util_build_program(const cl_program pr, const cl_device_id dev,
const char* const opt);
#define GET_CURRENT_TIMER(time) \
struct timespec time; \
timespec_get(&time, TIME_UTC); \
{ \
}
#define TIMER_DIFFERENCE(dt, time1, time2) \
{ \
dt = (time2.tv_sec - time1.tv_sec) * 1000000000 \
+ (time2.tv_nsec - time1.tv_nsec); \
}
#define START_TIMER GET_CURRENT_TIMER(start_timer1)
#define STOP_TIMER(dt) \
GET_CURRENT_TIMER(stop_timer2) \
TIMER_DIFFERENCE(dt, start_timer1, stop_timer2)

17
CL/Utils/Context.hpp Normal file
View file

@ -0,0 +1,17 @@
#pragma once
// OpenCL SDK includes
#include "OpenCLUtilsCpp_Export.h"
#include <CL/Utils/Error.hpp>
// OpenCL includes
#include <CL/opencl.hpp>
namespace cl {
namespace util {
Context UTILSCPP_EXPORT get_context(cl_uint plat_id, cl_uint dev_id,
cl_device_type type,
cl_int* error = nullptr);
}
}

84
CL/Utils/Detail.hpp Normal file
View file

@ -0,0 +1,84 @@
#pragma once
// STL includes
#include <stddef.h>
#include <utility> // std::forward, std::integer_sequence
#include <tuple> // std::tuple, std::get
#include <initializer_list> // std::initializer_list
namespace cl {
namespace util {
namespace detail {
// Borrowed from:
// https://www.fluentcpp.com/2019/03/05/for_each_arg-applying-a-function-to-each-argument-of-a-function-in-cpp/
template <class F, class... Args> F for_each_arg(F f, Args&&... args)
{
(void)std::initializer_list<int>{ (
(void)f(std::forward<Args>(args)), 0)... };
return f;
}
namespace impl {
// Borrowed from: https://stackoverflow.com/a/16387374/1476661
template <typename T, typename F, int... Is>
void for_each_in_tuple(T&& t, F&& f,
std::integer_sequence<int, Is...>)
{
auto l = {
(std::forward<F>(f)(std::get<Is>(std::forward<T>(t))), 0)...
};
(void)l;
}
}
template <typename... Ts, typename F>
void for_each_in_tuple(std::tuple<Ts...> const& t, F&& f)
{
impl::for_each_in_tuple(
t, std::forward<F>(f),
std::make_integer_sequence<int, sizeof...(Ts)>());
}
namespace impl {
// Borrowed from
// https://codereview.stackexchange.com/questions/193420/apply-a-function-to-each-element-of-a-tuple-map-a-tuple
template <class F, typename Tuple, std::size_t... Is>
auto transform_tuple(Tuple&& t, F&& f, std::index_sequence<Is...>)
{
return std::make_tuple(std::forward<F>(f)(std::get<Is>(t))...);
}
}
template <class F, typename... Args>
auto transform_tuple(const std::tuple<Args...>& t, F&& f)
{
return impl::transform_tuple(
t, std::forward<F>(f),
std::make_index_sequence<sizeof...(Args)>{});
}
namespace impl {
// Borrowed from
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3658.html
// with modifications of Casey Carter at
// https://stackoverflow.com/a/51365112/1476661
template <typename F, typename Tuple, std::size_t... I>
auto apply(F&& f, Tuple&& args, std::index_sequence<I...>)
-> decltype(std::forward<F>(f)(
std::get<I>(std::forward<Tuple>(args))...))
{
return std::forward<F>(f)(
std::get<I>(std::forward<Tuple>(args))...);
}
}
template <typename F, typename Tuple,
typename Indices = std::make_index_sequence<
std::tuple_size<std::remove_reference_t<Tuple>>::value>>
auto apply(F&& f, Tuple&& args)
-> decltype(impl::apply(std::forward<F>(f),
std::forward<Tuple>(args), Indices()))
{
return impl::apply(std::forward<F>(f), std::forward<Tuple>(args),
Indices());
}
}
}
}

21
CL/Utils/Device.hpp Normal file
View file

@ -0,0 +1,21 @@
#pragma once
#include "OpenCLUtilsCpp_Export.h"
#include <CL/Utils/Error.hpp>
#include <CL/opencl.hpp>
namespace cl {
namespace util {
bool UTILSCPP_EXPORT opencl_c_version_contains(
const cl::Device& device, const cl::string& version_fragment);
bool UTILSCPP_EXPORT supports_extension(const cl::Device& device,
const cl::string& extension);
#ifdef CL_VERSION_3_0
bool UTILSCPP_EXPORT supports_feature(const cl::Device& device,
const cl::string& feature_name);
#endif
}
}

88
CL/Utils/Error.h Normal file
View file

@ -0,0 +1,88 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
// OpenCL Utils includes
#include <CL/Utils/ErrorCodes.h>
// STL includes
#include <stdio.h> // fprintf
// OpenCL includes
#include <CL/cl.h>
// RET = function returns error code
// PAR = functions sets error code in the paremeter
#ifdef _DEBUG
#define OCLERROR_RET(func, err, label) \
do \
{ \
err = func; \
if (err != CL_SUCCESS) \
{ \
cl_util_print_error(err); \
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
__FILE__, #func); \
goto label; \
} \
} while (0)
#define OCLERROR_PAR(func, err, label) \
do \
{ \
func; \
if (err != CL_SUCCESS) \
{ \
cl_util_print_error(err); \
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
__FILE__, #func); \
goto label; \
} \
} while (0)
#define MEM_CHECK(func, err, label) \
do \
{ \
if ((func) == NULL) \
{ \
err = CL_OUT_OF_HOST_MEMORY; \
cl_util_print_error(err); \
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
__FILE__, #func); \
goto label; \
} \
} while (0)
#else
#define OCLERROR_RET(func, err, label) \
do \
{ \
err = func; \
if (err != CL_SUCCESS) goto label; \
} while (0)
#define OCLERROR_PAR(func, err, label) \
do \
{ \
func; \
if (err != CL_SUCCESS) goto label; \
} while (0)
#define MEM_CHECK(func, err, label) \
do \
{ \
if ((func) == NULL) \
{ \
err = CL_OUT_OF_HOST_MEMORY; \
goto label; \
} \
} while (0)
#endif
UTILS_EXPORT
void cl_util_print_error(cl_int error);

70
CL/Utils/Error.hpp Normal file
View file

@ -0,0 +1,70 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtilsCpp_Export.h"
// OpenCL Utils includes
#include <CL/Utils/ErrorCodes.h>
// OpenCL includes
#include <CL/opencl.hpp>
namespace cl {
namespace util {
#if defined(CL_HPP_ENABLE_EXCEPTIONS)
/*! \brief Exception class
*
* This may be thrown by SDK utility functions when
* CL_HPP_ENABLE_EXCEPTIONS is defined.
*/
class Error : public std::exception {
private:
int err_;
const char* errStr_;
public:
/*! \brief Create a new SDK error exception for a given error code
* and corresponding message.
*
* \param err error code value.
*
* \param errStr a descriptive string that must remain in scope until
* handling of the exception has concluded. If set, it
* will be returned by what().
*/
Error(cl_int err, const char* errStr = NULL): err_(err), errStr_(errStr)
{}
~Error() throw() {}
/*! \brief Get error string associated with exception
*
* \return A memory pointer to the error message string.
*/
virtual const char* what() const throw()
{
if (errStr_ == NULL)
{
return "empty";
}
else
{
return errStr_;
}
}
/*! \brief Get error code associated with exception
*
* \return The error code.
*/
cl_int err(void) const { return err_; }
};
#endif
namespace detail {
UTILSCPP_EXPORT cl_int errHandler(cl_int err, cl_int* errPtr,
const char* errStr = nullptr);
}
}
}

5
CL/Utils/ErrorCodes.h Normal file
View file

@ -0,0 +1,5 @@
#pragma once
#define CL_UTIL_INDEX_OUT_OF_RANGE -2000
#define CL_UTIL_DEVICE_NOT_INTEROPERABLE -2001
#define CL_UTIL_FILE_OPERATION_ERROR -2002

13
CL/Utils/Event.h Normal file
View file

@ -0,0 +1,13 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
// OpenCL includes
#include <CL/cl.h>
UTILS_EXPORT
cl_ulong cl_util_get_event_duration(const cl_event event,
const cl_profiling_info start,
const cl_profiling_info end,
cl_int* const error);

21
CL/Utils/Event.hpp Normal file
View file

@ -0,0 +1,21 @@
#pragma once
// OpenCL SDK includes
#include "OpenCLUtilsCpp_Export.h"
// STL includes
#include <chrono>
// OpenCL includes
#include <CL/opencl.hpp>
namespace cl {
namespace util {
template <cl_int From, cl_int To, typename Dur = std::chrono::nanoseconds>
auto get_duration(cl::Event& ev)
{
return std::chrono::duration_cast<Dur>(std::chrono::nanoseconds{
ev.getProfilingInfo<To>() - ev.getProfilingInfo<From>() });
}
}
}

42
CL/Utils/File.h Normal file
View file

@ -0,0 +1,42 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
// OpenCL includes
#include <CL/cl.h>
// read all the text file contents securely in ANSI C89
// return pointer to C-string with file contents
// can handle streams with no known size and no support for fseek
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
UTILS_EXPORT
char* cl_util_read_text_file(const char* const filename, size_t* const length,
cl_int* const error);
// read all the binary file contents securely in ANSI C89
// return pointer to file contents
// can handle streams with no known size and no support for fseek
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
UTILS_EXPORT
unsigned char* cl_util_read_binary_file(const char* const filename,
size_t* const length,
cl_int* const error);
// write binaries of OpenCL compiled program
// binaries are written as separate files for each device
// with file name "(program_file_name)_(name of device).bin"
// based on variant of Logan
// http://logan.tw/posts/2014/11/22/pre-compile-the-opencl-kernel-program-part-2/
UTILS_EXPORT
cl_int cl_util_write_binaries(const cl_program program,
const char* const program_file_name);
// read binaries of OpenCL compiled program
// from files of file names "(program_file_name)_(name of device).bin"
UTILS_EXPORT
cl_program cl_util_read_binaries(const cl_context context,
const cl_device_id* const devices,
const cl_uint num_devices,
const char* const program_file_name,
cl_int* const error);

49
CL/Utils/File.hpp Normal file
View file

@ -0,0 +1,49 @@
#pragma once
// OpenCL SDK includes
#include <CL/Utils/Utils.hpp>
// STL includes
#include <fstream>
#include <string>
namespace cl {
namespace util {
// Scott Meyers, Effective STL, Addison-Wesley Professional, 2001, Item 29
// with error handling
UTILSCPP_EXPORT
std::string read_text_file(const char* const filename, cl_int* const error)
{
std::ifstream in(filename);
if (in.good())
{
try
{
std::string red((std::istreambuf_iterator<char>(in)),
std::istreambuf_iterator<char>());
if (in.good() && in.eof())
{
if (error != nullptr) *error = CL_SUCCESS;
return red;
}
else
{
detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error,
"File read error!");
return std::string();
}
} catch (std::bad_alloc& ex)
{
detail::errHandler(CL_OUT_OF_RESOURCES, error,
"Bad allocation!");
return std::string();
}
}
else
{
detail::errHandler(CL_INVALID_VALUE, error, "No file!");
return std::string();
}
}
}
}

View file

@ -0,0 +1,18 @@
#pragma once
#include "OpenCLUtilsCpp_Export.h"
#include <CL/Utils/Error.hpp>
#include <CL/opencl.hpp>
namespace cl {
namespace util {
vector<cl_context_properties>
UTILSCPP_EXPORT get_interop_context_properties(const cl::Device& plat,
cl_int* error = nullptr);
Context UTILSCPP_EXPORT get_interop_context(int plat_id, int dev_id,
cl_device_type type,
cl_int* error = nullptr);
}
}

View file

@ -0,0 +1,42 @@
#ifndef UTILSCPP_EXPORT_H
#define UTILSCPP_EXPORT_H
#ifdef OPENCLUTILSCPP_STATIC_DEFINE
# define UTILSCPP_EXPORT
# define OPENCLUTILSCPP_NO_EXPORT
#else
# ifndef UTILSCPP_EXPORT
# ifdef OpenCLUtilsCpp_EXPORTS
/* We are building this library */
# define UTILSCPP_EXPORT
# else
/* We are using this library */
# define UTILSCPP_EXPORT
# endif
# endif
# ifndef OPENCLUTILSCPP_NO_EXPORT
# define OPENCLUTILSCPP_NO_EXPORT
# endif
#endif
#ifndef OPENCLUTILSCPP_DEPRECATED
# define OPENCLUTILSCPP_DEPRECATED __declspec(deprecated)
#endif
#ifndef OPENCLUTILSCPP_DEPRECATED_EXPORT
# define OPENCLUTILSCPP_DEPRECATED_EXPORT UTILSCPP_EXPORT OPENCLUTILSCPP_DEPRECATED
#endif
#ifndef OPENCLUTILSCPP_DEPRECATED_NO_EXPORT
# define OPENCLUTILSCPP_DEPRECATED_NO_EXPORT OPENCLUTILSCPP_NO_EXPORT OPENCLUTILSCPP_DEPRECATED
#endif
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef OPENCLUTILSCPP_NO_DEPRECATED
# define OPENCLUTILSCPP_NO_DEPRECATED
# endif
#endif
#endif /* UTILSCPP_EXPORT_H */

View file

@ -0,0 +1,42 @@
#ifndef UTILS_EXPORT_H
#define UTILS_EXPORT_H
#ifdef OPENCLUTILS_STATIC_DEFINE
# define UTILS_EXPORT
# define OPENCLUTILS_NO_EXPORT
#else
# ifndef UTILS_EXPORT
# ifdef OpenCLUtils_EXPORTS
/* We are building this library */
# define UTILS_EXPORT
# else
/* We are using this library */
# define UTILS_EXPORT
# endif
# endif
# ifndef OPENCLUTILS_NO_EXPORT
# define OPENCLUTILS_NO_EXPORT
# endif
#endif
#ifndef OPENCLUTILS_DEPRECATED
# define OPENCLUTILS_DEPRECATED __declspec(deprecated)
#endif
#ifndef OPENCLUTILS_DEPRECATED_EXPORT
# define OPENCLUTILS_DEPRECATED_EXPORT UTILS_EXPORT OPENCLUTILS_DEPRECATED
#endif
#ifndef OPENCLUTILS_DEPRECATED_NO_EXPORT
# define OPENCLUTILS_DEPRECATED_NO_EXPORT OPENCLUTILS_NO_EXPORT OPENCLUTILS_DEPRECATED
#endif
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef OPENCLUTILS_NO_DEPRECATED
# define OPENCLUTILS_NO_DEPRECATED
# endif
#endif
#endif /* UTILS_EXPORT_H */

16
CL/Utils/Platform.hpp Normal file
View file

@ -0,0 +1,16 @@
#pragma once
#include "OpenCLUtilsCpp_Export.h"
#include <CL/Utils/Error.hpp>
#include <CL/opencl.hpp>
namespace cl {
namespace util {
bool UTILSCPP_EXPORT supports_extension(const cl::Platform& platform,
const cl::string& extension);
bool UTILSCPP_EXPORT platform_version_contains(
const cl::Platform& platform, const cl::string& version_fragment);
}
}

11
CL/Utils/Utils.h Normal file
View file

@ -0,0 +1,11 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
#include <CL/Utils/Error.h>
#include <CL/Utils/File.h>
#include <CL/Utils/Context.h>
// OpenCL includes
#include <CL/cl.h>

14
CL/Utils/Utils.hpp Normal file
View file

@ -0,0 +1,14 @@
#pragma once
// OpenCL Utils includes
#include "OpenCLUtils_Export.h"
#include <CL/Utils/Detail.hpp>
#include <CL/Utils/Error.hpp>
#include <CL/Utils/Platform.hpp>
#include <CL/Utils/Device.hpp>
#include <CL/Utils/Context.hpp>
#include <CL/Utils/Event.hpp>
// OpenCL includes
#include <CL/opencl.hpp>

1936
CL/cl.h Normal file

File diff suppressed because it is too large Load diff

18
CL/cl2.hpp Normal file
View file

@ -0,0 +1,18 @@
//
// Copyright (c) 2020 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <CL/opencl.hpp>
#pragma message("cl2.hpp has been renamed to opencl.hpp to make it clear that it supports all versions of OpenCL. Please include opencl.hpp directly.")

154
CL/cl_d3d10.h Normal file
View file

@ -0,0 +1,154 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( push )
#pragma warning( disable : 4201 )
#pragma warning( disable : 5105 )
#endif
#endif
#include <d3d10.h>
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
/***************************************************************
* cl_intel_sharing_format_query_d3d10
***************************************************************/
#define cl_intel_sharing_format_query_d3d10 1
/* when cl_khr_d3d10_sharing is supported */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedD3D10TextureFormatsINTEL(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint num_entries,
DXGI_FORMAT* d3d10_formats,
cl_uint* num_texture_formats) ;
typedef cl_int (CL_API_CALL *
clGetSupportedD3D10TextureFormatsINTEL_fn)(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint num_entries,
DXGI_FORMAT* d3d10_formats,
cl_uint* num_texture_formats) ;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

156
CL/cl_d3d11.h Normal file
View file

@ -0,0 +1,156 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( push )
#pragma warning( disable : 4201 )
#pragma warning( disable : 5105 )
#endif
#endif
#include <d3d11.h>
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
/***************************************************************
* cl_intel_sharing_format_query_d3d11
***************************************************************/
#define cl_intel_sharing_format_query_d3d11 1
/* when cl_khr_d3d11_sharing is supported */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedD3D11TextureFormatsINTEL(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
DXGI_FORMAT* d3d11_formats,
cl_uint* num_texture_formats) ;
typedef cl_int (CL_API_CALL *
clGetSupportedD3D11TextureFormatsINTEL_fn)(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
DXGI_FORMAT* d3d11_formats,
cl_uint* num_texture_formats) ;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

268
CL/cl_dx9_media_sharing.h Normal file
View file

@ -0,0 +1,268 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( push )
#pragma warning( disable : 4201 )
#pragma warning( disable : 5105 )
#endif
#endif
#include <d3d9.h>
#if defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
/***************************************************************
* cl_intel_sharing_format_query_dx9
***************************************************************/
#define cl_intel_sharing_format_query_dx9 1
/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedDX9MediaSurfaceFormatsINTEL(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
D3DFORMAT* dx9_formats,
cl_uint* num_surface_formats) ;
typedef cl_int (CL_API_CALL *
clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
D3DFORMAT* dx9_formats,
cl_uint* num_surface_formats) ;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

View file

@ -0,0 +1,18 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#include <CL/cl_dx9_media_sharing.h>
#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.")

120
CL/cl_egl.h Normal file
View file

@ -0,0 +1,120 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

2634
CL/cl_ext.h Normal file

File diff suppressed because it is too large Load diff

19
CL/cl_ext_intel.h Normal file
View file

@ -0,0 +1,19 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
#include <CL/cl_ext.h>
#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.")

194
CL/cl_gl.h Normal file
View file

@ -0,0 +1,194 @@
/*******************************************************************************
* Copyright (c) 2008-2021 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#ifdef CL_VERSION_1_2
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
#endif
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#ifdef CL_VERSION_1_2
#define CL_GL_NUM_SAMPLES 0x2012
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint bufobj,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint renderbuffer,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem memobj,
cl_gl_object_type * gl_object_type,
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem memobj,
cl_gl_texture_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
/*
* cl_khr_gl_event extension
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context context,
cl_GLsync sync,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1;
/***************************************************************
* cl_intel_sharing_format_query_gl
***************************************************************/
#define cl_intel_sharing_format_query_gl 1
/* when cl_khr_gl_sharing is supported */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedGLTextureFormatsINTEL(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint num_entries,
cl_GLenum* gl_formats,
cl_uint* num_texture_formats) ;
typedef cl_int (CL_API_CALL *
clGetSupportedGLTextureFormatsINTEL_fn)(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint num_entries,
cl_GLenum* gl_formats,
cl_uint* num_texture_formats) ;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

18
CL/cl_gl_ext.h Normal file
View file

@ -0,0 +1,18 @@
/*******************************************************************************
* Copyright (c) 2008-2021 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#include <CL/cl_gl.h>
#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.")

440
CL/cl_half.h Normal file
View file

@ -0,0 +1,440 @@
/*******************************************************************************
* Copyright (c) 2019-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
/**
* This is a header-only utility library that provides OpenCL host code with
* routines for converting to/from cl_half values.
*
* Example usage:
*
* #include <CL/cl_half.h>
* ...
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
* cl_float f = cl_half_to_float(h);
*/
#ifndef OPENCL_CL_HALF_H
#define OPENCL_CL_HALF_H
#include <CL/cl_platform.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Rounding mode used when converting to cl_half.
*/
typedef enum
{
CL_HALF_RTE, // round to nearest even
CL_HALF_RTZ, // round towards zero
CL_HALF_RTP, // round towards positive infinity
CL_HALF_RTN, // round towards negative infinity
} cl_half_rounding_mode;
/* Private utility macros. */
#define CL_HALF_EXP_MASK 0x7C00
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
/*
* Utility to deal with values that overflow when converting to half precision.
*/
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
uint16_t sign)
{
if (rounding_mode == CL_HALF_RTZ)
{
// Round overflow towards zero -> largest finite number (preserving sign)
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
}
else if (rounding_mode == CL_HALF_RTP && sign)
{
// Round negative overflow towards positive infinity -> most negative finite number
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
}
else if (rounding_mode == CL_HALF_RTN && !sign)
{
// Round positive overflow towards negative infinity -> largest finite number
return CL_HALF_MAX_FINITE_MAG;
}
// Overflow to infinity
return (sign << 15) | CL_HALF_EXP_MASK;
}
/*
* Utility to deal with values that underflow when converting to half precision.
*/
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
uint16_t sign)
{
if (rounding_mode == CL_HALF_RTP && !sign)
{
// Round underflow towards positive infinity -> smallest positive value
return (sign << 15) | 1;
}
else if (rounding_mode == CL_HALF_RTN && sign)
{
// Round underflow towards negative infinity -> largest negative value
return (sign << 15) | 1;
}
// Flush to zero
return (sign << 15);
}
/**
* Convert a cl_float to a cl_half.
*/
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
{
// Type-punning to get direct access to underlying bits
union
{
cl_float f;
uint32_t i;
} f32;
f32.f = f;
// Extract sign bit
uint16_t sign = f32.i >> 31;
// Extract FP32 exponent and mantissa
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
// Remove FP32 exponent bias
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
// Add FP16 exponent bias
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
// Position of the bit that will become the FP16 mantissa LSB
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
// Check for NaN / infinity
if (f_exp == 0xFF)
{
if (f_mant)
{
// NaN -> propagate mantissa and silence it
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
h_mant |= 0x200;
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
}
else
{
// Infinity -> zero mantissa
return (sign << 15) | CL_HALF_EXP_MASK;
}
}
// Check for zero
if (!f_exp && !f_mant)
{
return (sign << 15);
}
// Check for overflow
if (exp >= CL_HALF_MAX_EXP)
{
return cl_half_handle_overflow(rounding_mode, sign);
}
// Check for underflow
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
{
return cl_half_handle_underflow(rounding_mode, sign);
}
// Check for value that will become denormal
if (exp < -14)
{
// Denormal -> include the implicit 1 from the FP32 mantissa
h_exp = 0;
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
// Mantissa shift amount depends on exponent
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
}
// Generate FP16 mantissa by shifting FP32 mantissa
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
// Check whether we need to round
uint32_t halfway = 1 << (lsb_pos - 1);
uint32_t mask = (halfway << 1) - 1;
switch (rounding_mode)
{
case CL_HALF_RTE:
if ((f_mant & mask) > halfway)
{
// More than halfway -> round up
h_mant += 1;
}
else if ((f_mant & mask) == halfway)
{
// Exactly halfway -> round to nearest even
if (h_mant & 0x1)
h_mant += 1;
}
break;
case CL_HALF_RTZ:
// Mantissa has already been truncated -> do nothing
break;
case CL_HALF_RTP:
if ((f_mant & mask) && !sign)
{
// Round positive numbers up
h_mant += 1;
}
break;
case CL_HALF_RTN:
if ((f_mant & mask) && sign)
{
// Round negative numbers down
h_mant += 1;
}
break;
}
// Check for mantissa overflow
if (h_mant & 0x400)
{
h_exp += 1;
h_mant = 0;
}
return (sign << 15) | (h_exp << 10) | h_mant;
}
/**
* Convert a cl_double to a cl_half.
*/
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
{
// Type-punning to get direct access to underlying bits
union
{
cl_double d;
uint64_t i;
} f64;
f64.d = d;
// Extract sign bit
uint16_t sign = f64.i >> 63;
// Extract FP64 exponent and mantissa
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
// Remove FP64 exponent bias
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
// Add FP16 exponent bias
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
// Position of the bit that will become the FP16 mantissa LSB
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
// Check for NaN / infinity
if (d_exp == 0x7FF)
{
if (d_mant)
{
// NaN -> propagate mantissa and silence it
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
h_mant |= 0x200;
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
}
else
{
// Infinity -> zero mantissa
return (sign << 15) | CL_HALF_EXP_MASK;
}
}
// Check for zero
if (!d_exp && !d_mant)
{
return (sign << 15);
}
// Check for overflow
if (exp >= CL_HALF_MAX_EXP)
{
return cl_half_handle_overflow(rounding_mode, sign);
}
// Check for underflow
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
{
return cl_half_handle_underflow(rounding_mode, sign);
}
// Check for value that will become denormal
if (exp < -14)
{
// Include the implicit 1 from the FP64 mantissa
h_exp = 0;
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
// Mantissa shift amount depends on exponent
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
}
// Generate FP16 mantissa by shifting FP64 mantissa
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
// Check whether we need to round
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
uint64_t mask = (halfway << 1) - 1;
switch (rounding_mode)
{
case CL_HALF_RTE:
if ((d_mant & mask) > halfway)
{
// More than halfway -> round up
h_mant += 1;
}
else if ((d_mant & mask) == halfway)
{
// Exactly halfway -> round to nearest even
if (h_mant & 0x1)
h_mant += 1;
}
break;
case CL_HALF_RTZ:
// Mantissa has already been truncated -> do nothing
break;
case CL_HALF_RTP:
if ((d_mant & mask) && !sign)
{
// Round positive numbers up
h_mant += 1;
}
break;
case CL_HALF_RTN:
if ((d_mant & mask) && sign)
{
// Round negative numbers down
h_mant += 1;
}
break;
}
// Check for mantissa overflow
if (h_mant & 0x400)
{
h_exp += 1;
h_mant = 0;
}
return (sign << 15) | (h_exp << 10) | h_mant;
}
/**
* Convert a cl_half to a cl_float.
*/
static inline cl_float cl_half_to_float(cl_half h)
{
// Type-punning to get direct access to underlying bits
union
{
cl_float f;
uint32_t i;
} f32;
// Extract sign bit
uint16_t sign = h >> 15;
// Extract FP16 exponent and mantissa
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = h & 0x3FF;
// Remove FP16 exponent bias
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
// Add FP32 exponent bias
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
// Check for NaN / infinity
if (h_exp == 0x1F)
{
if (h_mant)
{
// NaN -> propagate mantissa and silence it
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
f_mant |= 0x400000;
f32.i = (sign << 31) | 0x7F800000 | f_mant;
return f32.f;
}
else
{
// Infinity -> zero mantissa
f32.i = (sign << 31) | 0x7F800000;
return f32.f;
}
}
// Check for zero / denormal
if (h_exp == 0)
{
if (h_mant == 0)
{
// Zero -> zero exponent
f_exp = 0;
}
else
{
// Denormal -> normalize it
// - Shift mantissa to make most-significant 1 implicit
// - Adjust exponent accordingly
uint32_t shift = 0;
while ((h_mant & 0x400) == 0)
{
h_mant <<= 1;
shift++;
}
h_mant &= 0x3FF;
f_exp -= shift - 1;
}
}
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
return f32.f;
}
#undef CL_HALF_EXP_MASK
#undef CL_HALF_MAX_FINITE_MAG
#ifdef __cplusplus
}
#endif
#endif /* OPENCL_CL_HALF_H */

1294
CL/cl_icd.h Normal file

File diff suppressed because it is too large Load diff

62
CL/cl_layer.h Normal file
View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* OpenCL is a trademark of Apple Inc. used under license by Khronos.
*/
#ifndef OPENCL_CL_LAYER_H
#define OPENCL_CL_LAYER_H
#include <CL/cl_icd.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_layer_info;
typedef cl_uint cl_layer_api_version;
#define CL_LAYER_API_VERSION 0x4240
#define CL_LAYER_NAME 0x4241
#define CL_LAYER_API_VERSION_100 100
extern CL_API_ENTRY cl_int CL_API_CALL
clGetLayerInfo(cl_layer_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef cl_int
(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clInitLayer(cl_uint num_entries,
const cl_icd_dispatch *target_dispatch,
cl_uint *num_entries_ret,
const cl_icd_dispatch **layer_dispatch_ret);
typedef cl_int
(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries,
const cl_icd_dispatch *target_dispatch,
cl_uint *num_entries_ret,
const cl_icd_dispatch **layer_dispatch_ret);
#ifdef __cplusplus
}
#endif
#endif /* OPENCL_CL_LAYER_H */

1412
CL/cl_platform.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,163 @@
/*******************************************************************************
* Copyright (c) 2008-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************************************
* cl_intel_sharing_format_query_va_api
***************************************************************/
#define cl_intel_sharing_format_query_va_api 1
/* when cl_intel_va_api_media_sharing is supported */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedVA_APIMediaSurfaceFormatsINTEL(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
VAImageFormat* va_api_formats,
cl_uint* num_surface_formats) ;
typedef cl_int (CL_API_CALL *
clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)(
cl_context context,
cl_mem_flags flags,
cl_mem_object_type image_type,
cl_uint plane,
cl_uint num_entries,
VAImageFormat* va_api_formats,
cl_uint* num_surface_formats) ;
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */

81
CL/cl_version.h Normal file
View file

@ -0,0 +1,81 @@
/*******************************************************************************
* Copyright (c) 2018-2020 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __CL_VERSION_H
#define __CL_VERSION_H
/* Detect which version to target */
#if !defined(CL_TARGET_OPENCL_VERSION)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
#define CL_TARGET_OPENCL_VERSION 300
#endif
#if CL_TARGET_OPENCL_VERSION != 100 && \
CL_TARGET_OPENCL_VERSION != 110 && \
CL_TARGET_OPENCL_VERSION != 120 && \
CL_TARGET_OPENCL_VERSION != 200 && \
CL_TARGET_OPENCL_VERSION != 210 && \
CL_TARGET_OPENCL_VERSION != 220 && \
CL_TARGET_OPENCL_VERSION != 300
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
#undef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 300
#endif
/* OpenCL Version */
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
#define CL_VERSION_3_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
#define CL_VERSION_2_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
#define CL_VERSION_2_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
#define CL_VERSION_2_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
#define CL_VERSION_1_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
#define CL_VERSION_1_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
#define CL_VERSION_1_0 1
#endif
/* Allow deprecated APIs for older OpenCL versions. */
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
#endif
#endif /* __CL_VERSION_H */

201
CL/license.txt Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

32
CL/opencl.h Normal file
View file

@ -0,0 +1,32 @@
/*******************************************************************************
* Copyright (c) 2008-2021 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

10372
CL/opencl.hpp Normal file

File diff suppressed because it is too large Load diff

View file

@ -97,7 +97,7 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas
endif
ifdef LLAMA_CLBLAST
CFLAGS += -DGGML_USE_CLBLAST
CFLAGS += -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS
LDFLAGS += -lclblast -lOpenCL
endif
ifdef LLAMA_GPROF
@ -121,11 +121,18 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif
BLAS_BUILD =
OPENBLAS_BUILD =
ifeq ($(OS),Windows_NT)
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
else
BLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
endif
CLBLAST_BUILD =
ifeq ($(OS),Windows_NT)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
else
CLBLAST_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use CLBlast, please install it seperately, then link it manually with LLAMA_CLBLAST=1. This is just a reminder, not an error.'
endif
#
@ -143,7 +150,7 @@ $(info I CC: $(CCV))
$(info I CXX: $(CXXV))
$(info )
default: llamalib llamalib_blas
default: llamalib llamalib_openblas llamalib_clblast
#
# Build library
@ -152,8 +159,11 @@ default: llamalib llamalib_blas
ggml.o: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
ggml_blas.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_blas.o
ggml_openblas.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
ggml_clblast.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
@ -174,7 +184,7 @@ gpttype_adapter.o:
$(CXX) $(CXXFLAGS) -c gpttype_adapter.cpp -o gpttype_adapter.o
clean:
rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe gpt2.exe
rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_clblast.dll gptj.exe gpt2.exe
main: examples/main/main.cpp ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@ -185,8 +195,11 @@ main: examples/main/main.cpp ggml.o llama.o common.o
llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o -shared -o koboldcpp.dll $(LDFLAGS)
llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(BLAS_BUILD)
llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(OPENBLAS_BUILD)
llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(CLBLAST_BUILD)
quantize: examples/quantize/quantize.cpp ggml.o llama.o
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)

View file

@ -1,3 +1,5 @@
#pragma once
#ifndef CBLAS_H
#define CBLAS_H

BIN
clblast.dll Normal file

Binary file not shown.

1707
clblast_c.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,14 @@ extern "C"
std::string model = inputs.model_filename;
file_format = check_file_format(model.c_str());
//first digit is platform, second is devices
int platform = inputs.clblast_info/10;
int devices = inputs.clblast_info%10;
std::string platformenv = "KCPP_CBLAST_PLATFORM="+std::to_string(platform);
std::string deviceenv = "KCPP_CBLAST_DEVICES="+std::to_string(devices);
putenv(platformenv.c_str());
putenv(deviceenv.c_str());
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
{
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);

View file

@ -9,6 +9,7 @@ struct load_model_inputs
const char *model_filename;
const int n_parts_overwrite = -1;
const bool use_mmap;
const int clblast_info = 0;
};
struct generation_inputs
{

145
ggml.c
View file

@ -123,26 +123,10 @@ typedef void* thread_ret_t;
} \
} while (0)
#if GGML_USE_CLBLAST
#ifndef GGML_USE_OPENBLAS
#define GGML_USE_OPENBLAS
#endif
#define CL_TARGET_OPENCL_VERSION 110
#include <clblast_c.h>
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_event event;
bool cl_initialized = false;
#endif
#ifdef GGML_USE_ACCELERATE
#include <Accelerate/Accelerate.h>
#elif defined(GGML_USE_OPENBLAS)
#include <cblas.h>
#elif GGML_USE_OPENBLAS
#include <ggml_blas_adapter.c>
#endif
#undef MIN
@ -6330,7 +6314,7 @@ static void ggml_compute_forward_rms_norm(
// ggml_compute_forward_mul_mat
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
// helper function to determine if it is better to use BLAS or not
// for large matrices, BLAS is faster
static bool ggml_compute_forward_mul_mat_use_blas(
@ -6355,85 +6339,6 @@ static bool ggml_compute_forward_mul_mat_use_blas(
return false;
}
#ifdef GGML_USE_CLBLAST
static bool ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) {
cl_int err = 0;
if (!cl_initialized) {
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, platforms, NULL);
platform = platforms[0];
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
device = devices[0];
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL context: %d\n", err);
fflush(stdout);
}
queue = clCreateCommandQueue(context, device, 0, &err);
event = NULL;
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Command Queue: %d\n", err);
fflush(stdout);
}
free(platforms);
free(devices);
cl_initialized = true;
}
// Prepare buffers
cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer A: %d\n", err);
fflush(stdout);
}
cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer B: %d\n", err);
fflush(stdout);
}
cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer C: %d\n", err);
fflush(stdout);
}
clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
// Call the SGEMM routine.
CLBlastStatusCode status = CLBlastSgemm(order,
trans_a, trans_b,
m, n, k,
alpha,
cl_buffer_a, 0, lda,
cl_buffer_b, 0, ldb,
beta,
cl_buffer_c, 0, ldc,
&queue, &event);
// Wait for completion
if (status == CLBlastSuccess) {
clWaitForEvents(1, &event);
clReleaseEvent(event);
}
clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
clReleaseMemObject(cl_buffer_a);
clReleaseMemObject(cl_buffer_b);
clReleaseMemObject(cl_buffer_c);
}
#endif
#endif
static void ggml_compute_forward_mul_mat_f32(
@ -6449,7 +6354,7 @@ static void ggml_compute_forward_mul_mat_f32(
const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3];
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
const int64_t ne10 = src1->ne[0];
#endif
const int64_t ne11 = src1->ne[1];
@ -6506,7 +6411,7 @@ static void ggml_compute_forward_mul_mat_f32(
// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
if (params->ith != 0) {
return;
@ -6528,19 +6433,11 @@ static void ggml_compute_forward_mul_mat_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
// zT = y * xT
#ifdef GGML_USE_CLBLAST
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#else
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#endif
}
}
@ -6673,7 +6570,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
GGML_ASSERT(nb10 == sizeof(float));
@ -6708,19 +6605,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
// zT = y * xT
#ifdef GGML_USE_CLBLAST
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#else
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#endif
}
}
@ -6896,7 +6785,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
if (params->ith != 0) {
return;
@ -6929,19 +6818,11 @@ static void ggml_compute_forward_mul_mat_q_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
// zT = y * xT
#ifdef GGML_USE_CLBLAST
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#else
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01);
#endif
}
}
@ -9672,7 +9553,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
size_t cur = 0;
if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
node->n_tasks = 1; // TODO: this actually is doing nothing
// the threads are still spinning
@ -9689,7 +9570,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
} else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
cur = 0;
} else if (quantize_fns[node->src0->type].vec_dot_q && node->src1->type == GGML_TYPE_F32) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
node->n_tasks = 1;
cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
@ -10986,7 +10867,7 @@ int ggml_cpu_has_wasm_simd(void) {
}
int ggml_cpu_has_blas(void) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
return 1;
#else
return 0;

124
ggml_blas_adapter.c Normal file
View file

@ -0,0 +1,124 @@
//this is a drop-in for all CLBlast related code, to keep the main ggml.c unmodified
// we will imitate the function definition from OpenBLAS instead, replaced as necessary.
//windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license)
//windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license)
#include <cblas.h>
#include <stdio.h>
#include <stdlib.h>
#if GGML_USE_CLBLAST
#define CL_TARGET_OPENCL_VERSION 110
#include <clblast_c.h>
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_event event;
bool cl_initialized = false;
static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) {
cl_int err = 0;
if (!cl_initialized) {
char * KCPP_CBLAST_PLATFORM = getenv("KCPP_CBLAST_PLATFORM");
char * KCPP_CBLAST_DEVICES = getenv("KCPP_CBLAST_DEVICES");
int plat_num = (KCPP_CBLAST_PLATFORM == NULL ? 0 : atoi(KCPP_CBLAST_PLATFORM));
int dev_num = (KCPP_CBLAST_DEVICES == NULL ? 0 : atoi(KCPP_CBLAST_DEVICES));
printf("\nInitializing CLBlast (First Run)...");
printf("\nSelected: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num);
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, platforms, NULL);
platform = platforms[plat_num];
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
device = devices[dev_num];
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL context: %d\n", err);
fflush(stdout);
}
queue = clCreateCommandQueue(context, device, 0, &err);
event = NULL;
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Command Queue: %d\n", err);
fflush(stdout);
}
free(platforms);
free(devices);
cl_initialized = true;
}
// Prepare buffers
cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer A: %d\n", err);
fflush(stdout);
}
cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer B: %d\n", err);
fflush(stdout);
}
cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("Error creating OpenCL Buffer C: %d\n", err);
fflush(stdout);
}
clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
// Call the SGEMM routine.
CLBlastStatusCode status = CLBlastSgemm(order,
trans_a, trans_b,
m, n, k,
alpha,
cl_buffer_a, 0, lda,
cl_buffer_b, 0, ldb,
beta,
cl_buffer_c, 0, ldc,
&queue, &event);
// Wait for completion
if (status == CLBlastSuccess) {
clWaitForEvents(1, &event);
clReleaseEvent(event);
}
clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
clReleaseMemObject(cl_buffer_a);
clReleaseMemObject(cl_buffer_b);
clReleaseMemObject(cl_buffer_c);
}
#endif
static void do_blas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc)
{
#if GGML_USE_CLBLAST
ggml_cl_sgemm_wrapper(Order, TransA, TransB,
M, N, K,
alpha, A, lda,
B, ldb,
beta, C, ldc);
#else
cblas_sgemm(Order, TransA, TransB,
M, N, K,
alpha, A, lda,
B, ldb,
beta, C, ldc);
#endif
}

Binary file not shown.

View file

@ -15,7 +15,8 @@ class load_model_inputs(ctypes.Structure):
("f16_kv", ctypes.c_bool),
("model_filename", ctypes.c_char_p),
("n_parts_overwrite", ctypes.c_int),
("use_mmap", ctypes.c_bool)]
("use_mmap", ctypes.c_bool),
("clblast_info", ctypes.c_int)]
class generation_inputs(ctypes.Structure):
_fields_ = [("seed", ctypes.c_int),
@ -34,12 +35,15 @@ class generation_outputs(ctypes.Structure):
handle = None
use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
use_clblast = False #uses CLBlast instead
def init_library():
global handle, use_blas
global handle, use_blas, use_clblast
libname = ""
if use_blas:
libname = "koboldcpp_blas.dll"
libname = "koboldcpp_openblas.dll"
elif use_clblast:
libname = "koboldcpp_clblast.dll"
else:
libname = "koboldcpp.dll"
@ -63,6 +67,10 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
inputs.n_parts_overwrite = n_parts_overwrite
inputs.f16_kv = True
inputs.use_mmap = use_mmap
clblastids = 0
if args.useclblast:
clblastids = int(args.useclblast[0])*10 + int(args.useclblast[1])
inputs.clblast_info = clblastids
ret = handle.load_model(inputs)
return ret
@ -301,13 +309,19 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
sys.exit(0)
def main(args):
global use_blas
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")):
print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
global use_blas, use_clblast
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
use_blas = False
elif os.name != 'nt':
print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")
use_blas = False
elif args.useclblast:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")):
print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.")
else:
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
use_clblast = True
elif not args.noblas:
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
use_blas = True
@ -397,5 +411,6 @@ if __name__ == '__main__':
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
parser.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
args = parser.parse_args()
main(args)

Binary file not shown.

BIN
lib/OpenCL.lib Normal file

Binary file not shown.

BIN
lib/clblast.lib Normal file

Binary file not shown.

View file

@ -251,12 +251,10 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
last_n_tokens.push_back(embd_inp[input_consumed]);
current_context_tokens.push_back(embd_inp[input_consumed]);
++input_consumed;
#ifndef GGML_USE_CLBLAST
if ((int)embd.size() >= params.n_batch)
{
break;
}
#endif
}
}
}

View file

@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"