integrated optional (experimentl) CLBlast support
This commit is contained in:
parent
c9f18082fd
commit
23c675b2e6
53 changed files with 22095 additions and 151 deletions
50
CL/Utils/Context.h
Normal file
50
CL/Utils/Context.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// STL includes
|
||||
#include <time.h>
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_context cl_util_get_context(const cl_uint plat_id, const cl_uint dev_id,
|
||||
const cl_device_type type, cl_int* const error);
|
||||
UTILS_EXPORT
|
||||
cl_device_id cl_util_get_device(const cl_uint plat_id, const cl_uint dev_id,
|
||||
const cl_device_type type, cl_int* const error);
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_print_device_info(const cl_device_id device);
|
||||
|
||||
UTILS_EXPORT
|
||||
char* cl_util_get_device_info(const cl_device_id device,
|
||||
const cl_device_info info, cl_int* const error);
|
||||
UTILS_EXPORT
|
||||
char* cl_util_get_platform_info(const cl_platform_id platform,
|
||||
const cl_platform_info info,
|
||||
cl_int* const error);
|
||||
|
||||
// build program and show log if build is not successful
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_build_program(const cl_program pr, const cl_device_id dev,
|
||||
const char* const opt);
|
||||
|
||||
#define GET_CURRENT_TIMER(time) \
|
||||
struct timespec time; \
|
||||
timespec_get(&time, TIME_UTC); \
|
||||
{ \
|
||||
}
|
||||
|
||||
#define TIMER_DIFFERENCE(dt, time1, time2) \
|
||||
{ \
|
||||
dt = (time2.tv_sec - time1.tv_sec) * 1000000000 \
|
||||
+ (time2.tv_nsec - time1.tv_nsec); \
|
||||
}
|
||||
|
||||
#define START_TIMER GET_CURRENT_TIMER(start_timer1)
|
||||
#define STOP_TIMER(dt) \
|
||||
GET_CURRENT_TIMER(stop_timer2) \
|
||||
TIMER_DIFFERENCE(dt, start_timer1, stop_timer2)
|
17
CL/Utils/Context.hpp
Normal file
17
CL/Utils/Context.hpp
Normal file
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
Context UTILSCPP_EXPORT get_context(cl_uint plat_id, cl_uint dev_id,
|
||||
cl_device_type type,
|
||||
cl_int* error = nullptr);
|
||||
}
|
||||
}
|
84
CL/Utils/Detail.hpp
Normal file
84
CL/Utils/Detail.hpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
#pragma once
|
||||
|
||||
// STL includes
|
||||
#include <stddef.h>
|
||||
#include <utility> // std::forward, std::integer_sequence
|
||||
#include <tuple> // std::tuple, std::get
|
||||
#include <initializer_list> // std::initializer_list
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
namespace detail {
|
||||
// Borrowed from:
|
||||
// https://www.fluentcpp.com/2019/03/05/for_each_arg-applying-a-function-to-each-argument-of-a-function-in-cpp/
|
||||
template <class F, class... Args> F for_each_arg(F f, Args&&... args)
|
||||
{
|
||||
(void)std::initializer_list<int>{ (
|
||||
(void)f(std::forward<Args>(args)), 0)... };
|
||||
return f;
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from: https://stackoverflow.com/a/16387374/1476661
|
||||
template <typename T, typename F, int... Is>
|
||||
void for_each_in_tuple(T&& t, F&& f,
|
||||
std::integer_sequence<int, Is...>)
|
||||
{
|
||||
auto l = {
|
||||
(std::forward<F>(f)(std::get<Is>(std::forward<T>(t))), 0)...
|
||||
};
|
||||
(void)l;
|
||||
}
|
||||
}
|
||||
template <typename... Ts, typename F>
|
||||
void for_each_in_tuple(std::tuple<Ts...> const& t, F&& f)
|
||||
{
|
||||
impl::for_each_in_tuple(
|
||||
t, std::forward<F>(f),
|
||||
std::make_integer_sequence<int, sizeof...(Ts)>());
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from
|
||||
// https://codereview.stackexchange.com/questions/193420/apply-a-function-to-each-element-of-a-tuple-map-a-tuple
|
||||
template <class F, typename Tuple, std::size_t... Is>
|
||||
auto transform_tuple(Tuple&& t, F&& f, std::index_sequence<Is...>)
|
||||
{
|
||||
return std::make_tuple(std::forward<F>(f)(std::get<Is>(t))...);
|
||||
}
|
||||
}
|
||||
template <class F, typename... Args>
|
||||
auto transform_tuple(const std::tuple<Args...>& t, F&& f)
|
||||
{
|
||||
return impl::transform_tuple(
|
||||
t, std::forward<F>(f),
|
||||
std::make_index_sequence<sizeof...(Args)>{});
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from
|
||||
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3658.html
|
||||
// with modifications of Casey Carter at
|
||||
// https://stackoverflow.com/a/51365112/1476661
|
||||
template <typename F, typename Tuple, std::size_t... I>
|
||||
auto apply(F&& f, Tuple&& args, std::index_sequence<I...>)
|
||||
-> decltype(std::forward<F>(f)(
|
||||
std::get<I>(std::forward<Tuple>(args))...))
|
||||
{
|
||||
return std::forward<F>(f)(
|
||||
std::get<I>(std::forward<Tuple>(args))...);
|
||||
}
|
||||
}
|
||||
template <typename F, typename Tuple,
|
||||
typename Indices = std::make_index_sequence<
|
||||
std::tuple_size<std::remove_reference_t<Tuple>>::value>>
|
||||
auto apply(F&& f, Tuple&& args)
|
||||
-> decltype(impl::apply(std::forward<F>(f),
|
||||
std::forward<Tuple>(args), Indices()))
|
||||
{
|
||||
return impl::apply(std::forward<F>(f), std::forward<Tuple>(args),
|
||||
Indices());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
21
CL/Utils/Device.hpp
Normal file
21
CL/Utils/Device.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
bool UTILSCPP_EXPORT opencl_c_version_contains(
|
||||
const cl::Device& device, const cl::string& version_fragment);
|
||||
|
||||
bool UTILSCPP_EXPORT supports_extension(const cl::Device& device,
|
||||
const cl::string& extension);
|
||||
|
||||
#ifdef CL_VERSION_3_0
|
||||
bool UTILSCPP_EXPORT supports_feature(const cl::Device& device,
|
||||
const cl::string& feature_name);
|
||||
#endif
|
||||
}
|
||||
}
|
88
CL/Utils/Error.h
Normal file
88
CL/Utils/Error.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include <CL/Utils/ErrorCodes.h>
|
||||
|
||||
// STL includes
|
||||
#include <stdio.h> // fprintf
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// RET = function returns error code
|
||||
// PAR = functions sets error code in the paremeter
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
#define OCLERROR_RET(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
err = func; \
|
||||
if (err != CL_SUCCESS) \
|
||||
{ \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OCLERROR_PAR(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
func; \
|
||||
if (err != CL_SUCCESS) \
|
||||
{ \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define MEM_CHECK(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
if ((func) == NULL) \
|
||||
{ \
|
||||
err = CL_OUT_OF_HOST_MEMORY; \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define OCLERROR_RET(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
err = func; \
|
||||
if (err != CL_SUCCESS) goto label; \
|
||||
} while (0)
|
||||
|
||||
#define OCLERROR_PAR(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
func; \
|
||||
if (err != CL_SUCCESS) goto label; \
|
||||
} while (0)
|
||||
|
||||
#define MEM_CHECK(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
if ((func) == NULL) \
|
||||
{ \
|
||||
err = CL_OUT_OF_HOST_MEMORY; \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
UTILS_EXPORT
|
||||
void cl_util_print_error(cl_int error);
|
70
CL/Utils/Error.hpp
Normal file
70
CL/Utils/Error.hpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include <CL/Utils/ErrorCodes.h>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
#if defined(CL_HPP_ENABLE_EXCEPTIONS)
|
||||
/*! \brief Exception class
|
||||
*
|
||||
* This may be thrown by SDK utility functions when
|
||||
* CL_HPP_ENABLE_EXCEPTIONS is defined.
|
||||
*/
|
||||
class Error : public std::exception {
|
||||
private:
|
||||
int err_;
|
||||
const char* errStr_;
|
||||
|
||||
public:
|
||||
/*! \brief Create a new SDK error exception for a given error code
|
||||
* and corresponding message.
|
||||
*
|
||||
* \param err error code value.
|
||||
*
|
||||
* \param errStr a descriptive string that must remain in scope until
|
||||
* handling of the exception has concluded. If set, it
|
||||
* will be returned by what().
|
||||
*/
|
||||
Error(cl_int err, const char* errStr = NULL): err_(err), errStr_(errStr)
|
||||
{}
|
||||
|
||||
~Error() throw() {}
|
||||
|
||||
/*! \brief Get error string associated with exception
|
||||
*
|
||||
* \return A memory pointer to the error message string.
|
||||
*/
|
||||
virtual const char* what() const throw()
|
||||
{
|
||||
if (errStr_ == NULL)
|
||||
{
|
||||
return "empty";
|
||||
}
|
||||
else
|
||||
{
|
||||
return errStr_;
|
||||
}
|
||||
}
|
||||
|
||||
/*! \brief Get error code associated with exception
|
||||
*
|
||||
* \return The error code.
|
||||
*/
|
||||
cl_int err(void) const { return err_; }
|
||||
};
|
||||
#endif
|
||||
|
||||
namespace detail {
|
||||
UTILSCPP_EXPORT cl_int errHandler(cl_int err, cl_int* errPtr,
|
||||
const char* errStr = nullptr);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
5
CL/Utils/ErrorCodes.h
Normal file
5
CL/Utils/ErrorCodes.h
Normal file
|
@ -0,0 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#define CL_UTIL_INDEX_OUT_OF_RANGE -2000
|
||||
#define CL_UTIL_DEVICE_NOT_INTEROPERABLE -2001
|
||||
#define CL_UTIL_FILE_OPERATION_ERROR -2002
|
13
CL/Utils/Event.h
Normal file
13
CL/Utils/Event.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_ulong cl_util_get_event_duration(const cl_event event,
|
||||
const cl_profiling_info start,
|
||||
const cl_profiling_info end,
|
||||
cl_int* const error);
|
21
CL/Utils/Event.hpp
Normal file
21
CL/Utils/Event.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
// STL includes
|
||||
#include <chrono>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
template <cl_int From, cl_int To, typename Dur = std::chrono::nanoseconds>
|
||||
auto get_duration(cl::Event& ev)
|
||||
{
|
||||
return std::chrono::duration_cast<Dur>(std::chrono::nanoseconds{
|
||||
ev.getProfilingInfo<To>() - ev.getProfilingInfo<From>() });
|
||||
}
|
||||
}
|
||||
}
|
42
CL/Utils/File.h
Normal file
42
CL/Utils/File.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// read all the text file contents securely in ANSI C89
|
||||
// return pointer to C-string with file contents
|
||||
// can handle streams with no known size and no support for fseek
|
||||
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
|
||||
UTILS_EXPORT
|
||||
char* cl_util_read_text_file(const char* const filename, size_t* const length,
|
||||
cl_int* const error);
|
||||
|
||||
// read all the binary file contents securely in ANSI C89
|
||||
// return pointer to file contents
|
||||
// can handle streams with no known size and no support for fseek
|
||||
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
|
||||
UTILS_EXPORT
|
||||
unsigned char* cl_util_read_binary_file(const char* const filename,
|
||||
size_t* const length,
|
||||
cl_int* const error);
|
||||
|
||||
// write binaries of OpenCL compiled program
|
||||
// binaries are written as separate files for each device
|
||||
// with file name "(program_file_name)_(name of device).bin"
|
||||
// based on variant of Logan
|
||||
// http://logan.tw/posts/2014/11/22/pre-compile-the-opencl-kernel-program-part-2/
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_write_binaries(const cl_program program,
|
||||
const char* const program_file_name);
|
||||
|
||||
// read binaries of OpenCL compiled program
|
||||
// from files of file names "(program_file_name)_(name of device).bin"
|
||||
UTILS_EXPORT
|
||||
cl_program cl_util_read_binaries(const cl_context context,
|
||||
const cl_device_id* const devices,
|
||||
const cl_uint num_devices,
|
||||
const char* const program_file_name,
|
||||
cl_int* const error);
|
49
CL/Utils/File.hpp
Normal file
49
CL/Utils/File.hpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include <CL/Utils/Utils.hpp>
|
||||
|
||||
// STL includes
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
// Scott Meyers, Effective STL, Addison-Wesley Professional, 2001, Item 29
|
||||
// with error handling
|
||||
UTILSCPP_EXPORT
|
||||
std::string read_text_file(const char* const filename, cl_int* const error)
|
||||
{
|
||||
std::ifstream in(filename);
|
||||
if (in.good())
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string red((std::istreambuf_iterator<char>(in)),
|
||||
std::istreambuf_iterator<char>());
|
||||
if (in.good() && in.eof())
|
||||
{
|
||||
if (error != nullptr) *error = CL_SUCCESS;
|
||||
return red;
|
||||
}
|
||||
else
|
||||
{
|
||||
detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error,
|
||||
"File read error!");
|
||||
return std::string();
|
||||
}
|
||||
} catch (std::bad_alloc& ex)
|
||||
{
|
||||
detail::errHandler(CL_OUT_OF_RESOURCES, error,
|
||||
"Bad allocation!");
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
detail::errHandler(CL_INVALID_VALUE, error, "No file!");
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
18
CL/Utils/InteropContext.hpp
Normal file
18
CL/Utils/InteropContext.hpp
Normal file
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
vector<cl_context_properties>
|
||||
UTILSCPP_EXPORT get_interop_context_properties(const cl::Device& plat,
|
||||
cl_int* error = nullptr);
|
||||
|
||||
Context UTILSCPP_EXPORT get_interop_context(int plat_id, int dev_id,
|
||||
cl_device_type type,
|
||||
cl_int* error = nullptr);
|
||||
}
|
||||
}
|
42
CL/Utils/OpenCLUtilsCpp_Export.h
Normal file
42
CL/Utils/OpenCLUtilsCpp_Export.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
|
||||
#ifndef UTILSCPP_EXPORT_H
|
||||
#define UTILSCPP_EXPORT_H
|
||||
|
||||
#ifdef OPENCLUTILSCPP_STATIC_DEFINE
|
||||
# define UTILSCPP_EXPORT
|
||||
# define OPENCLUTILSCPP_NO_EXPORT
|
||||
#else
|
||||
# ifndef UTILSCPP_EXPORT
|
||||
# ifdef OpenCLUtilsCpp_EXPORTS
|
||||
/* We are building this library */
|
||||
# define UTILSCPP_EXPORT
|
||||
# else
|
||||
/* We are using this library */
|
||||
# define UTILSCPP_EXPORT
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef OPENCLUTILSCPP_NO_EXPORT
|
||||
# define OPENCLUTILSCPP_NO_EXPORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED
|
||||
# define OPENCLUTILSCPP_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED_EXPORT
|
||||
# define OPENCLUTILSCPP_DEPRECATED_EXPORT UTILSCPP_EXPORT OPENCLUTILSCPP_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED_NO_EXPORT
|
||||
# define OPENCLUTILSCPP_DEPRECATED_NO_EXPORT OPENCLUTILSCPP_NO_EXPORT OPENCLUTILSCPP_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef OPENCLUTILSCPP_NO_DEPRECATED
|
||||
# define OPENCLUTILSCPP_NO_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* UTILSCPP_EXPORT_H */
|
42
CL/Utils/OpenCLUtils_Export.h
Normal file
42
CL/Utils/OpenCLUtils_Export.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
|
||||
#ifndef UTILS_EXPORT_H
|
||||
#define UTILS_EXPORT_H
|
||||
|
||||
#ifdef OPENCLUTILS_STATIC_DEFINE
|
||||
# define UTILS_EXPORT
|
||||
# define OPENCLUTILS_NO_EXPORT
|
||||
#else
|
||||
# ifndef UTILS_EXPORT
|
||||
# ifdef OpenCLUtils_EXPORTS
|
||||
/* We are building this library */
|
||||
# define UTILS_EXPORT
|
||||
# else
|
||||
/* We are using this library */
|
||||
# define UTILS_EXPORT
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef OPENCLUTILS_NO_EXPORT
|
||||
# define OPENCLUTILS_NO_EXPORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED
|
||||
# define OPENCLUTILS_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED_EXPORT
|
||||
# define OPENCLUTILS_DEPRECATED_EXPORT UTILS_EXPORT OPENCLUTILS_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED_NO_EXPORT
|
||||
# define OPENCLUTILS_DEPRECATED_NO_EXPORT OPENCLUTILS_NO_EXPORT OPENCLUTILS_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef OPENCLUTILS_NO_DEPRECATED
|
||||
# define OPENCLUTILS_NO_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* UTILS_EXPORT_H */
|
16
CL/Utils/Platform.hpp
Normal file
16
CL/Utils/Platform.hpp
Normal file
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
bool UTILSCPP_EXPORT supports_extension(const cl::Platform& platform,
|
||||
const cl::string& extension);
|
||||
|
||||
bool UTILSCPP_EXPORT platform_version_contains(
|
||||
const cl::Platform& platform, const cl::string& version_fragment);
|
||||
}
|
||||
}
|
11
CL/Utils/Utils.h
Normal file
11
CL/Utils/Utils.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
#include <CL/Utils/Error.h>
|
||||
#include <CL/Utils/File.h>
|
||||
#include <CL/Utils/Context.h>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
14
CL/Utils/Utils.hpp
Normal file
14
CL/Utils/Utils.hpp
Normal file
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
#include <CL/Utils/Detail.hpp>
|
||||
#include <CL/Utils/Error.hpp>
|
||||
#include <CL/Utils/Platform.hpp>
|
||||
#include <CL/Utils/Device.hpp>
|
||||
#include <CL/Utils/Context.hpp>
|
||||
#include <CL/Utils/Event.hpp>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
18
CL/cl2.hpp
Normal file
18
CL/cl2.hpp
Normal file
|
@ -0,0 +1,18 @@
|
|||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
#pragma message("cl2.hpp has been renamed to opencl.hpp to make it clear that it supports all versions of OpenCL. Please include opencl.hpp directly.")
|
154
CL/cl_d3d10.h
Normal file
154
CL/cl_d3d10.h
Normal file
|
@ -0,0 +1,154 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d10.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_d3d10
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_d3d10 1
|
||||
|
||||
/* when cl_khr_d3d10_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedD3D10TextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d10_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedD3D10TextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d10_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
156
CL/cl_d3d11.h
Normal file
156
CL/cl_d3d11.h
Normal file
|
@ -0,0 +1,156 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d11.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_d3d11
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_d3d11 1
|
||||
|
||||
/* when cl_khr_d3d11_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedD3D11TextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d11_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedD3D11TextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d11_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
268
CL/cl_dx9_media_sharing.h
Normal file
268
CL/cl_dx9_media_sharing.h
Normal file
|
@ -0,0 +1,268 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d9.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
/***************************************
|
||||
* cl_intel_dx9_media_sharing extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_device_source_intel;
|
||||
typedef cl_uint cl_dx9_device_set_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||
|
||||
/* cl_dx9_device_source_intel */
|
||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||
|
||||
/* cl_dx9_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||
/******************************************************************************/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromDX9INTEL(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromDX9MediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_dx9
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_dx9 1
|
||||
|
||||
/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedDX9MediaSurfaceFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
D3DFORMAT* dx9_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
D3DFORMAT* dx9_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
18
CL/cl_dx9_media_sharing_intel.h
Normal file
18
CL/cl_dx9_media_sharing_intel.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_dx9_media_sharing.h>
|
||||
#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.")
|
120
CL/cl_egl.h
Normal file
120
CL/cl_egl.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
2634
CL/cl_ext.h
Normal file
2634
CL/cl_ext.h
Normal file
File diff suppressed because it is too large
Load diff
19
CL/cl_ext_intel.h
Normal file
19
CL/cl_ext_intel.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_ext.h>
|
||||
#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.")
|
194
CL/cl_gl.h
Normal file
194
CL/cl_gl.h
Normal file
|
@ -0,0 +1,194 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
#endif
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
#endif
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type * gl_object_type,
|
||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context context,
|
||||
cl_GLsync sync,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_gl
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_gl 1
|
||||
|
||||
/* when cl_khr_gl_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedGLTextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_GLenum* gl_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedGLTextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_GLenum* gl_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
18
CL/cl_gl_ext.h
Normal file
18
CL/cl_gl_ext.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.")
|
440
CL/cl_half.h
Normal file
440
CL/cl_half.h
Normal file
|
@ -0,0 +1,440 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2019-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* This is a header-only utility library that provides OpenCL host code with
|
||||
* routines for converting to/from cl_half values.
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* #include <CL/cl_half.h>
|
||||
* ...
|
||||
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
|
||||
* cl_float f = cl_half_to_float(h);
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_CL_HALF_H
|
||||
#define OPENCL_CL_HALF_H
|
||||
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Rounding mode used when converting to cl_half.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
CL_HALF_RTE, // round to nearest even
|
||||
CL_HALF_RTZ, // round towards zero
|
||||
CL_HALF_RTP, // round towards positive infinity
|
||||
CL_HALF_RTN, // round towards negative infinity
|
||||
} cl_half_rounding_mode;
|
||||
|
||||
|
||||
/* Private utility macros. */
|
||||
#define CL_HALF_EXP_MASK 0x7C00
|
||||
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
|
||||
|
||||
|
||||
/*
|
||||
* Utility to deal with values that overflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTZ)
|
||||
{
|
||||
// Round overflow towards zero -> largest finite number (preserving sign)
|
||||
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTP && sign)
|
||||
{
|
||||
// Round negative overflow towards positive infinity -> most negative finite number
|
||||
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && !sign)
|
||||
{
|
||||
// Round positive overflow towards negative infinity -> largest finite number
|
||||
return CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
|
||||
// Overflow to infinity
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility to deal with values that underflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTP && !sign)
|
||||
{
|
||||
// Round underflow towards positive infinity -> smallest positive value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && sign)
|
||||
{
|
||||
// Round underflow towards negative infinity -> largest negative value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
|
||||
// Flush to zero
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_float to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
f32.f = f;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f32.i >> 31;
|
||||
|
||||
// Extract FP32 exponent and mantissa
|
||||
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
|
||||
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP32 exponent bias
|
||||
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (f_exp == 0xFF)
|
||||
{
|
||||
if (f_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!f_exp && !f_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Denormal -> include the implicit 1 from the FP32 mantissa
|
||||
h_exp = 0;
|
||||
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP32 mantissa
|
||||
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
|
||||
|
||||
// Check whether we need to round
|
||||
uint32_t halfway = 1 << (lsb_pos - 1);
|
||||
uint32_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((f_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((f_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((f_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((f_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_double to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_double d;
|
||||
uint64_t i;
|
||||
} f64;
|
||||
f64.d = d;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f64.i >> 63;
|
||||
|
||||
// Extract FP64 exponent and mantissa
|
||||
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
|
||||
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP64 exponent bias
|
||||
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (d_exp == 0x7FF)
|
||||
{
|
||||
if (d_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!d_exp && !d_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Include the implicit 1 from the FP64 mantissa
|
||||
h_exp = 0;
|
||||
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP64 mantissa
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
|
||||
// Check whether we need to round
|
||||
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
|
||||
uint64_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((d_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((d_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((d_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((d_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_half to a cl_float.
|
||||
*/
|
||||
static inline cl_float cl_half_to_float(cl_half h)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = h >> 15;
|
||||
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = h & 0x3FF;
|
||||
|
||||
// Remove FP16 exponent bias
|
||||
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
|
||||
|
||||
// Add FP32 exponent bias
|
||||
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (h_exp == 0x1F)
|
||||
{
|
||||
if (h_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
|
||||
f_mant |= 0x400000;
|
||||
f32.i = (sign << 31) | 0x7F800000 | f_mant;
|
||||
return f32.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
f32.i = (sign << 31) | 0x7F800000;
|
||||
return f32.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero / denormal
|
||||
if (h_exp == 0)
|
||||
{
|
||||
if (h_mant == 0)
|
||||
{
|
||||
// Zero -> zero exponent
|
||||
f_exp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Denormal -> normalize it
|
||||
// - Shift mantissa to make most-significant 1 implicit
|
||||
// - Adjust exponent accordingly
|
||||
uint32_t shift = 0;
|
||||
while ((h_mant & 0x400) == 0)
|
||||
{
|
||||
h_mant <<= 1;
|
||||
shift++;
|
||||
}
|
||||
h_mant &= 0x3FF;
|
||||
f_exp -= shift - 1;
|
||||
}
|
||||
}
|
||||
|
||||
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
|
||||
return f32.f;
|
||||
}
|
||||
|
||||
|
||||
#undef CL_HALF_EXP_MASK
|
||||
#undef CL_HALF_MAX_FINITE_MAG
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* OPENCL_CL_HALF_H */
|
1294
CL/cl_icd.h
Normal file
1294
CL/cl_icd.h
Normal file
File diff suppressed because it is too large
Load diff
62
CL/cl_layer.h
Normal file
62
CL/cl_layer.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* OpenCL is a trademark of Apple Inc. used under license by Khronos.
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_CL_LAYER_H
|
||||
#define OPENCL_CL_LAYER_H
|
||||
|
||||
#include <CL/cl_icd.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_layer_info;
|
||||
typedef cl_uint cl_layer_api_version;
|
||||
#define CL_LAYER_API_VERSION 0x4240
|
||||
#define CL_LAYER_NAME 0x4241
|
||||
#define CL_LAYER_API_VERSION_100 100
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetLayerInfo(cl_layer_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clInitLayer(cl_uint num_entries,
|
||||
const cl_icd_dispatch *target_dispatch,
|
||||
cl_uint *num_entries_ret,
|
||||
const cl_icd_dispatch **layer_dispatch_ret);
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries,
|
||||
const cl_icd_dispatch *target_dispatch,
|
||||
cl_uint *num_entries_ret,
|
||||
const cl_icd_dispatch **layer_dispatch_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* OPENCL_CL_LAYER_H */
|
1412
CL/cl_platform.h
Normal file
1412
CL/cl_platform.h
Normal file
File diff suppressed because it is too large
Load diff
163
CL/cl_va_api_media_sharing_intel.h
Normal file
163
CL/cl_va_api_media_sharing_intel.h
Normal file
|
@ -0,0 +1,163 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <va/va.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_va_api
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_va_api 1
|
||||
|
||||
/* when cl_intel_va_api_media_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedVA_APIMediaSurfaceFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
VAImageFormat* va_api_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
VAImageFormat* va_api_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
/******************************************
|
||||
* cl_intel_va_api_media_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_va_api_media_sharing 1
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||
|
||||
/* cl_va_api_device_source_intel */
|
||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||
|
||||
/* cl_va_api_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||
|
||||
typedef cl_uint cl_va_api_device_source_intel;
|
||||
typedef cl_uint cl_va_api_device_set_intel;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||
|
81
CL/cl_version.h
Normal file
81
CL/cl_version.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2018-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __CL_VERSION_H
|
||||
#define __CL_VERSION_H
|
||||
|
||||
/* Detect which version to target */
|
||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||
CL_TARGET_OPENCL_VERSION != 220 && \
|
||||
CL_TARGET_OPENCL_VERSION != 300
|
||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
|
||||
#undef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#endif
|
||||
|
||||
|
||||
/* OpenCL Version */
|
||||
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
|
||||
#define CL_VERSION_3_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||
#define CL_VERSION_2_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||
#define CL_VERSION_2_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||
#define CL_VERSION_2_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||
#define CL_VERSION_1_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||
#define CL_VERSION_1_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||
#define CL_VERSION_1_0 1
|
||||
#endif
|
||||
|
||||
/* Allow deprecated APIs for older OpenCL versions. */
|
||||
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#endif /* __CL_VERSION_H */
|
201
CL/license.txt
Normal file
201
CL/license.txt
Normal file
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
32
CL/opencl.h
Normal file
32
CL/opencl.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
10372
CL/opencl.hpp
Normal file
10372
CL/opencl.hpp
Normal file
File diff suppressed because it is too large
Load diff
33
Makefile
33
Makefile
|
@ -97,7 +97,7 @@ ifdef LLAMA_OPENBLAS
|
|||
LDFLAGS += -lopenblas
|
||||
endif
|
||||
ifdef LLAMA_CLBLAST
|
||||
CFLAGS += -DGGML_USE_CLBLAST
|
||||
CFLAGS += -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS
|
||||
LDFLAGS += -lclblast -lOpenCL
|
||||
endif
|
||||
ifdef LLAMA_GPROF
|
||||
|
@ -121,11 +121,18 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
|
|||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
BLAS_BUILD =
|
||||
OPENBLAS_BUILD =
|
||||
ifeq ($(OS),Windows_NT)
|
||||
BLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o libopenblas.lib -shared -o koboldcpp_blas.dll $(LDFLAGS)
|
||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
|
||||
else
|
||||
BLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
|
||||
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
|
||||
endif
|
||||
|
||||
CLBLAST_BUILD =
|
||||
ifeq ($(OS),Windows_NT)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
|
||||
else
|
||||
CLBLAST_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use CLBlast, please install it seperately, then link it manually with LLAMA_CLBLAST=1. This is just a reminder, not an error.'
|
||||
endif
|
||||
|
||||
#
|
||||
|
@ -143,7 +150,7 @@ $(info I CC: $(CCV))
|
|||
$(info I CXX: $(CXXV))
|
||||
$(info )
|
||||
|
||||
default: llamalib llamalib_blas
|
||||
default: llamalib llamalib_openblas llamalib_clblast
|
||||
|
||||
#
|
||||
# Build library
|
||||
|
@ -152,8 +159,11 @@ default: llamalib llamalib_blas
|
|||
ggml.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
||||
|
||||
ggml_blas.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_blas.o
|
||||
ggml_openblas.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
|
||||
|
||||
ggml_clblast.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o
|
||||
|
||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
|
||||
|
@ -174,7 +184,7 @@ gpttype_adapter.o:
|
|||
$(CXX) $(CXXFLAGS) -c gpttype_adapter.cpp -o gpttype_adapter.o
|
||||
|
||||
clean:
|
||||
rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_blas.dll gptj.exe gpt2.exe
|
||||
rm -vf *.o main quantize quantize-stats perplexity embedding main.exe quantize.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_clblast.dll gptj.exe gpt2.exe
|
||||
|
||||
main: examples/main/main.cpp ggml.o llama.o common.o
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
||||
|
@ -185,8 +195,11 @@ main: examples/main/main.cpp ggml.o llama.o common.o
|
|||
llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(CXX) $(CXXFLAGS) ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o -shared -o koboldcpp.dll $(LDFLAGS)
|
||||
|
||||
llamalib_blas: ggml_blas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(BLAS_BUILD)
|
||||
llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(OPENBLAS_BUILD)
|
||||
|
||||
llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(CLBLAST_BUILD)
|
||||
|
||||
quantize: examples/quantize/quantize.cpp ggml.o llama.o
|
||||
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
|
||||
|
|
2
cblas.h
2
cblas.h
|
@ -1,3 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
|
|
BIN
clblast.dll
Normal file
BIN
clblast.dll
Normal file
Binary file not shown.
1707
clblast_c.h
Normal file
1707
clblast_c.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -31,6 +31,14 @@ extern "C"
|
|||
std::string model = inputs.model_filename;
|
||||
file_format = check_file_format(model.c_str());
|
||||
|
||||
//first digit is platform, second is devices
|
||||
int platform = inputs.clblast_info/10;
|
||||
int devices = inputs.clblast_info%10;
|
||||
std::string platformenv = "KCPP_CBLAST_PLATFORM="+std::to_string(platform);
|
||||
std::string deviceenv = "KCPP_CBLAST_DEVICES="+std::to_string(devices);
|
||||
putenv(platformenv.c_str());
|
||||
putenv(deviceenv.c_str());
|
||||
|
||||
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
|
||||
{
|
||||
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||
|
|
1
expose.h
1
expose.h
|
@ -9,6 +9,7 @@ struct load_model_inputs
|
|||
const char *model_filename;
|
||||
const int n_parts_overwrite = -1;
|
||||
const bool use_mmap;
|
||||
const int clblast_info = 0;
|
||||
};
|
||||
struct generation_inputs
|
||||
{
|
||||
|
|
145
ggml.c
145
ggml.c
|
@ -123,26 +123,10 @@ typedef void* thread_ret_t;
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#if GGML_USE_CLBLAST
|
||||
#ifndef GGML_USE_OPENBLAS
|
||||
#define GGML_USE_OPENBLAS
|
||||
#endif
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 110
|
||||
#include <clblast_c.h>
|
||||
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
cl_event event;
|
||||
bool cl_initialized = false;
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_ACCELERATE
|
||||
#include <Accelerate/Accelerate.h>
|
||||
#elif defined(GGML_USE_OPENBLAS)
|
||||
#include <cblas.h>
|
||||
#elif GGML_USE_OPENBLAS
|
||||
#include <ggml_blas_adapter.c>
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
|
@ -6330,7 +6314,7 @@ static void ggml_compute_forward_rms_norm(
|
|||
|
||||
// ggml_compute_forward_mul_mat
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
// helper function to determine if it is better to use BLAS or not
|
||||
// for large matrices, BLAS is faster
|
||||
static bool ggml_compute_forward_mul_mat_use_blas(
|
||||
|
@ -6355,85 +6339,6 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
static bool ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) {
|
||||
cl_int err = 0;
|
||||
|
||||
if (!cl_initialized) {
|
||||
cl_uint num_platforms;
|
||||
clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
|
||||
clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
platform = platforms[0];
|
||||
cl_uint num_devices;
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
|
||||
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
|
||||
device = devices[0];
|
||||
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL context: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
queue = clCreateCommandQueue(context, device, 0, &err);
|
||||
event = NULL;
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Command Queue: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
free(platforms);
|
||||
free(devices);
|
||||
cl_initialized = true;
|
||||
}
|
||||
|
||||
// Prepare buffers
|
||||
cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer A: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer B: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer C: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
|
||||
|
||||
// Call the SGEMM routine.
|
||||
CLBlastStatusCode status = CLBlastSgemm(order,
|
||||
trans_a, trans_b,
|
||||
m, n, k,
|
||||
alpha,
|
||||
cl_buffer_a, 0, lda,
|
||||
cl_buffer_b, 0, ldb,
|
||||
beta,
|
||||
cl_buffer_c, 0, ldc,
|
||||
&queue, &event);
|
||||
|
||||
// Wait for completion
|
||||
if (status == CLBlastSuccess) {
|
||||
clWaitForEvents(1, &event);
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
|
||||
|
||||
clReleaseMemObject(cl_buffer_a);
|
||||
clReleaseMemObject(cl_buffer_b);
|
||||
clReleaseMemObject(cl_buffer_c);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static void ggml_compute_forward_mul_mat_f32(
|
||||
|
@ -6449,7 +6354,7 @@ static void ggml_compute_forward_mul_mat_f32(
|
|||
const int64_t ne02 = src0->ne[2];
|
||||
const int64_t ne03 = src0->ne[3];
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
const int64_t ne10 = src1->ne[0];
|
||||
#endif
|
||||
const int64_t ne11 = src1->ne[1];
|
||||
|
@ -6506,7 +6411,7 @@ static void ggml_compute_forward_mul_mat_f32(
|
|||
// nb01 >= nb00 - src0 is not transposed
|
||||
// compute by src0 rows
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
||||
if (params->ith != 0) {
|
||||
return;
|
||||
|
@ -6528,19 +6433,11 @@ static void ggml_compute_forward_mul_mat_f32(
|
|||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
||||
|
||||
// zT = y * xT
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#else
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6673,7 +6570,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|||
// nb01 >= nb00 - src0 is not transposed
|
||||
// compute by src0 rows
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
||||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
|
||||
|
@ -6708,19 +6605,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
||||
|
||||
// zT = y * xT
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#else
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6896,7 +6785,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|||
// nb01 >= nb00 - src0 is not transposed
|
||||
// compute by src0 rows
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
||||
if (params->ith != 0) {
|
||||
return;
|
||||
|
@ -6929,19 +6818,11 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
||||
|
||||
// zT = y * xT
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
ggml_cl_sgemm_wrapper(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
do_blas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#else
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne10,
|
||||
0.0f, d, ne01);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9672,7 +9553,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||
size_t cur = 0;
|
||||
|
||||
if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
||||
node->n_tasks = 1; // TODO: this actually is doing nothing
|
||||
// the threads are still spinning
|
||||
|
@ -9689,7 +9570,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||
} else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
|
||||
cur = 0;
|
||||
} else if (quantize_fns[node->src0->type].vec_dot_q && node->src1->type == GGML_TYPE_F32) {
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
||||
node->n_tasks = 1;
|
||||
cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
|
||||
|
@ -10986,7 +10867,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_has_blas(void) {
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
|
|
124
ggml_blas_adapter.c
Normal file
124
ggml_blas_adapter.c
Normal file
|
@ -0,0 +1,124 @@
|
|||
//this is a drop-in for all CLBlast related code, to keep the main ggml.c unmodified
|
||||
// we will imitate the function definition from OpenBLAS instead, replaced as necessary.
|
||||
|
||||
//windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license)
|
||||
//windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license)
|
||||
|
||||
#include <cblas.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if GGML_USE_CLBLAST
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 110
|
||||
#include <clblast_c.h>
|
||||
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
cl_event event;
|
||||
bool cl_initialized = false;
|
||||
|
||||
static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, const int m, const int n, const int k, const float alpha, const float *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc) {
|
||||
cl_int err = 0;
|
||||
|
||||
if (!cl_initialized) {
|
||||
char * KCPP_CBLAST_PLATFORM = getenv("KCPP_CBLAST_PLATFORM");
|
||||
char * KCPP_CBLAST_DEVICES = getenv("KCPP_CBLAST_DEVICES");
|
||||
int plat_num = (KCPP_CBLAST_PLATFORM == NULL ? 0 : atoi(KCPP_CBLAST_PLATFORM));
|
||||
int dev_num = (KCPP_CBLAST_DEVICES == NULL ? 0 : atoi(KCPP_CBLAST_DEVICES));
|
||||
printf("\nInitializing CLBlast (First Run)...");
|
||||
printf("\nSelected: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num);
|
||||
cl_uint num_platforms;
|
||||
clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
|
||||
clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
platform = platforms[plat_num];
|
||||
cl_uint num_devices;
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
|
||||
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
|
||||
device = devices[dev_num];
|
||||
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL context: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
queue = clCreateCommandQueue(context, device, 0, &err);
|
||||
event = NULL;
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Command Queue: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
free(platforms);
|
||||
free(devices);
|
||||
cl_initialized = true;
|
||||
}
|
||||
|
||||
// Prepare buffers
|
||||
cl_mem cl_buffer_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer A: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
cl_mem cl_buffer_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer B: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
cl_mem cl_buffer_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("Error creating OpenCL Buffer C: %d\n", err);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
|
||||
clEnqueueWriteBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
|
||||
|
||||
// Call the SGEMM routine.
|
||||
CLBlastStatusCode status = CLBlastSgemm(order,
|
||||
trans_a, trans_b,
|
||||
m, n, k,
|
||||
alpha,
|
||||
cl_buffer_a, 0, lda,
|
||||
cl_buffer_b, 0, ldb,
|
||||
beta,
|
||||
cl_buffer_c, 0, ldc,
|
||||
&queue, &event);
|
||||
|
||||
// Wait for completion
|
||||
if (status == CLBlastSuccess) {
|
||||
clWaitForEvents(1, &event);
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
||||
clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
|
||||
|
||||
clReleaseMemObject(cl_buffer_a);
|
||||
clReleaseMemObject(cl_buffer_b);
|
||||
clReleaseMemObject(cl_buffer_c);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void do_blas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc)
|
||||
{
|
||||
#if GGML_USE_CLBLAST
|
||||
ggml_cl_sgemm_wrapper(Order, TransA, TransB,
|
||||
M, N, K,
|
||||
alpha, A, lda,
|
||||
B, ldb,
|
||||
beta, C, ldc);
|
||||
#else
|
||||
cblas_sgemm(Order, TransA, TransB,
|
||||
M, N, K,
|
||||
alpha, A, lda,
|
||||
B, ldb,
|
||||
beta, C, ldc);
|
||||
#endif
|
||||
}
|
BIN
koboldcpp.dll
BIN
koboldcpp.dll
Binary file not shown.
27
koboldcpp.py
27
koboldcpp.py
|
@ -15,7 +15,8 @@ class load_model_inputs(ctypes.Structure):
|
|||
("f16_kv", ctypes.c_bool),
|
||||
("model_filename", ctypes.c_char_p),
|
||||
("n_parts_overwrite", ctypes.c_int),
|
||||
("use_mmap", ctypes.c_bool)]
|
||||
("use_mmap", ctypes.c_bool),
|
||||
("clblast_info", ctypes.c_int)]
|
||||
|
||||
class generation_inputs(ctypes.Structure):
|
||||
_fields_ = [("seed", ctypes.c_int),
|
||||
|
@ -34,12 +35,15 @@ class generation_outputs(ctypes.Structure):
|
|||
|
||||
handle = None
|
||||
use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
|
||||
use_clblast = False #uses CLBlast instead
|
||||
|
||||
def init_library():
|
||||
global handle, use_blas
|
||||
global handle, use_blas, use_clblast
|
||||
libname = ""
|
||||
if use_blas:
|
||||
libname = "koboldcpp_blas.dll"
|
||||
libname = "koboldcpp_openblas.dll"
|
||||
elif use_clblast:
|
||||
libname = "koboldcpp_clblast.dll"
|
||||
else:
|
||||
libname = "koboldcpp.dll"
|
||||
|
||||
|
@ -63,6 +67,10 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
|
|||
inputs.n_parts_overwrite = n_parts_overwrite
|
||||
inputs.f16_kv = True
|
||||
inputs.use_mmap = use_mmap
|
||||
clblastids = 0
|
||||
if args.useclblast:
|
||||
clblastids = int(args.useclblast[0])*10 + int(args.useclblast[1])
|
||||
inputs.clblast_info = clblastids
|
||||
ret = handle.load_model(inputs)
|
||||
return ret
|
||||
|
||||
|
@ -301,13 +309,19 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
|||
sys.exit(0)
|
||||
|
||||
def main(args):
|
||||
global use_blas
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_blas.dll")):
|
||||
print("Warning: libopenblas.dll or koboldcpp_blas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
|
||||
global use_blas, use_clblast
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
|
||||
print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
|
||||
use_blas = False
|
||||
elif os.name != 'nt':
|
||||
print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")
|
||||
use_blas = False
|
||||
elif args.useclblast:
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")):
|
||||
print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.")
|
||||
else:
|
||||
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
|
||||
use_clblast = True
|
||||
elif not args.noblas:
|
||||
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
|
||||
use_blas = True
|
||||
|
@ -397,5 +411,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
|
||||
parser.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
||||
parser.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
|
Binary file not shown.
BIN
lib/OpenCL.lib
Normal file
BIN
lib/OpenCL.lib
Normal file
Binary file not shown.
BIN
lib/clblast.lib
Normal file
BIN
lib/clblast.lib
Normal file
Binary file not shown.
|
@ -251,12 +251,10 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
|
|||
last_n_tokens.push_back(embd_inp[input_consumed]);
|
||||
current_context_tokens.push_back(embd_inp[input_consumed]);
|
||||
++input_consumed;
|
||||
#ifndef GGML_USE_CLBLAST
|
||||
if ((int)embd.size() >= params.n_batch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_blas.dll;." --add-data "./libopenblas.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
|
||||
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
|
Loading…
Add table
Add a link
Reference in a new issue