Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.
This commit is contained in:
parent
acfc5478ff
commit
4cdaa3c9cb
97 changed files with 13550 additions and 26 deletions
450
kompute/src/Algorithm.cpp
Normal file
450
kompute/src/Algorithm.cpp
Normal file
|
@ -0,0 +1,450 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::~Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit()
|
||||
{
|
||||
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
|
||||
this->mDescriptorPool && this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout && this->mShaderModule;
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::destroy()
|
||||
{
|
||||
// We don't have to free memory on destroy as it's freed by the
|
||||
// commandBuffer destructor if (this->mPushConstantsData) {
|
||||
// free(this->mPushConstantsData);
|
||||
// }
|
||||
// if (this->mSpecializationConstantsData) {
|
||||
// free(this->mSpecializationConstantsData);
|
||||
// }
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
|
||||
"Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreePipeline && this->mPipeline) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
|
||||
if (!this->mPipeline) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipeline,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipeline = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineCache && this->mPipelineCache) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
||||
if (!this->mPipelineCache) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineCache,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineCache = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineLayout && this->mPipelineLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||
if (!this->mPipelineLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeShaderModule && this->mShaderModule) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
|
||||
if (!this->mShaderModule) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mShaderModule,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mShaderModule = nullptr;
|
||||
}
|
||||
|
||||
freeParameters();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::freeParameters()
|
||||
{
|
||||
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
|
||||
if (!this->mDescriptorSetLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorSetLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorSetLayout = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
|
||||
if (!*this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
descriptorSetBindings.push_back(
|
||||
vk::DescriptorSetLayoutBinding(i, // Binding index
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
1, // Descriptor count
|
||||
vk::ShaderStageFlagBits::eCompute));
|
||||
}
|
||||
|
||||
// This is the component that is fed into the pipeline
|
||||
vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
|
||||
vk::DescriptorSetLayoutCreateFlags(),
|
||||
static_cast<uint32_t>(descriptorSetBindings.size()),
|
||||
descriptorSetBindings.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout");
|
||||
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
|
||||
vk::Result result = this->mDevice->createDescriptorSetLayout(
|
||||
&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSetLayout = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor set layout.");
|
||||
}
|
||||
|
||||
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
|
||||
*this->mDescriptorPool,
|
||||
1, // Descriptor set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
|
||||
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
|
||||
result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
|
||||
this->mDescriptorSet.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSet = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
|
||||
}
|
||||
|
||||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
i, // Destination binding
|
||||
0, // Destination array element
|
||||
1, // Descriptor count
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
nullptr, // Descriptor image info
|
||||
&descriptorBufferInfo));
|
||||
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::updateParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm updateParameters started");
|
||||
if (!*this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
|
||||
*this->mDescriptorPool,
|
||||
1, // Descriptor set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
|
||||
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
|
||||
vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
|
||||
this->mDescriptorSet.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSet = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
|
||||
}
|
||||
|
||||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
i, // Destination binding
|
||||
0, // Destination array element
|
||||
1, // Descriptor count
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
nullptr, // Descriptor image info
|
||||
&descriptorBufferInfo));
|
||||
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createShaderModule()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) *
|
||||
this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
this->mSpirv.size());
|
||||
this->mFreeShaderModule = true;
|
||||
this->mShaderModule = std::make_shared<vk::ShaderModule>();
|
||||
this->mDevice->createShaderModule(
|
||||
&shaderModuleInfo, nullptr, this->mShaderModule.get());
|
||||
this->mFreeShaderModule = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm create shader module success");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createPipeline()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline");
|
||||
|
||||
vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
|
||||
vk::PipelineLayoutCreateFlags(),
|
||||
1, // Set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
vk::PushConstantRange pushConstantRange;
|
||||
if (this->mPushConstantsSize) {
|
||||
pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
|
||||
pushConstantRange.setOffset(0);
|
||||
pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize *
|
||||
this->mPushConstantsSize);
|
||||
|
||||
pipelineLayoutInfo.setPushConstantRangeCount(1);
|
||||
pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
|
||||
}
|
||||
|
||||
this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
|
||||
this->mDevice->createPipelineLayout(
|
||||
&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
|
||||
this->mFreePipelineLayout = true;
|
||||
|
||||
std::vector<vk::SpecializationMapEntry> specializationEntries;
|
||||
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
|
||||
vk::SpecializationMapEntry specializationEntry(
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(
|
||||
this->mSpecializationConstantsDataTypeMemorySize * i),
|
||||
this->mSpecializationConstantsDataTypeMemorySize);
|
||||
|
||||
specializationEntries.push_back(specializationEntry);
|
||||
}
|
||||
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// specialization container by using "transferDataOwnership"
|
||||
vk::SpecializationInfo specializationInfo(
|
||||
static_cast<uint32_t>(specializationEntries.size()),
|
||||
specializationEntries.data(),
|
||||
this->mSpecializationConstantsDataTypeMemorySize *
|
||||
this->mSpecializationConstantsSize,
|
||||
this->mSpecializationConstantsData);
|
||||
|
||||
vk::PipelineShaderStageCreateInfo shaderStage(
|
||||
vk::PipelineShaderStageCreateFlags(),
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
*this->mShaderModule,
|
||||
"main",
|
||||
&specializationInfo);
|
||||
|
||||
static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
|
||||
if(!*globalPipelineCache) {
|
||||
vk::PipelineCacheCreateInfo pipelineCacheInfo =
|
||||
vk::PipelineCacheCreateInfo();
|
||||
this->mPipelineCache = globalPipelineCache;
|
||||
this->mFreePipelineCache = true;
|
||||
this->mDevice->createPipelineCache(
|
||||
&pipelineCacheInfo, nullptr, globalPipelineCache.get());
|
||||
}
|
||||
|
||||
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
|
||||
shaderStage,
|
||||
*this->mPipelineLayout,
|
||||
vk::Pipeline(),
|
||||
0);
|
||||
|
||||
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
|
||||
vk::ResultValue<vk::Pipeline> pipelineResult =
|
||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
|
||||
|
||||
if (pipelineResult.result != vk::Result::eSuccess) {
|
||||
throw std::runtime_error("Failed to create pipeline result: " +
|
||||
vk::to_string(pipelineResult.result));
|
||||
}
|
||||
|
||||
vk::Pipeline& pipeline = pipelineResult.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#else
|
||||
vk::Pipeline pipeline =
|
||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
|
||||
.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#endif
|
||||
|
||||
// TODO: Update to consistent
|
||||
// this->mPipeline = std::make_shared<vk::Pipeline>();
|
||||
// this->mDevice->createComputePipelines(
|
||||
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
|
||||
// this->mPipeline.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
|
||||
|
||||
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
|
||||
|
||||
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
if (this->mPushConstantsSize) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
|
||||
this->mPushConstantsSize *
|
||||
this->mPushConstantsDataTypeMemorySize);
|
||||
|
||||
commandBuffer.pushConstants(*this->mPipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
this->mPushConstantsSize *
|
||||
this->mPushConstantsDataTypeMemorySize,
|
||||
this->mPushConstantsData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
|
||||
|
||||
commandBuffer.dispatch(
|
||||
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (workgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mWorkgroup = { workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1 };
|
||||
} else {
|
||||
this->mWorkgroup = { minSize, 1, 1 };
|
||||
}
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mWorkgroup[0],
|
||||
this->mWorkgroup[1],
|
||||
this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup()
|
||||
{
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors()
|
||||
{
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
void Algorithm::setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
}
|
82
kompute/src/CMakeLists.txt
Normal file
82
kompute/src/CMakeLists.txt
Normal file
|
@ -0,0 +1,82 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
if(KOMPUTE_OPT_ANDROID_BUILD)
|
||||
find_library(android android)
|
||||
endif()
|
||||
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
add_library(kompute Algorithm.cpp
|
||||
Manager.cpp
|
||||
OpAlgoDispatch.cpp
|
||||
OpMemoryBarrier.cpp
|
||||
OpTensorCopy.cpp
|
||||
OpTensorSyncDevice.cpp
|
||||
OpTensorSyncLocal.cpp
|
||||
OpBufferSyncDevice.cpp
|
||||
OpBufferSyncLocal.cpp
|
||||
Sequence.cpp
|
||||
Tensor.cpp
|
||||
Core.cpp)
|
||||
|
||||
add_library(kompute::kompute ALIAS kompute)
|
||||
|
||||
# Set version for shared libraries.
|
||||
set_target_properties(kompute
|
||||
PROPERTIES
|
||||
VERSION ${${PROJECT_NAME}_VERSION}
|
||||
SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR})
|
||||
|
||||
# Import GNU common install directory variables
|
||||
include(GNUInstallDirs)
|
||||
|
||||
install(TARGETS kompute
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
|
||||
# Include CMake helpers for package config files
|
||||
# Follow this installation guideline: https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/komputeConfig.cmake.in
|
||||
"${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
|
||||
|
||||
install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake
|
||||
${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
|
||||
|
||||
# ####################################################
|
||||
# Linking
|
||||
# ####################################################
|
||||
if(KOMPUTE_OPT_ANDROID_BUILD)
|
||||
target_link_libraries(kompute PUBLIC vulkanAndroid
|
||||
android
|
||||
kp_logger
|
||||
kp_shader
|
||||
fmt::fmt)
|
||||
else()
|
||||
target_link_libraries(kompute PUBLIC Vulkan::Vulkan
|
||||
kp_logger
|
||||
kp_shader
|
||||
fmt::fmt)
|
||||
endif()
|
||||
|
||||
if(KOMPUTE_OPT_BUILD_PYTHON)
|
||||
include_directories(${PYTHON_INCLUDE_DIRS})
|
||||
|
||||
target_link_libraries(kompute PRIVATE pybind11::headers ${PYTHON_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER)
|
||||
target_link_libraries(kompute PUBLIC Vulkan-Headers)
|
||||
endif()
|
||||
|
||||
# ####################################################
|
||||
# Misc
|
||||
# ####################################################
|
||||
add_subdirectory(logger)
|
||||
add_subdirectory(shaders)
|
||||
add_subdirectory(include)
|
27
kompute/src/Core.cpp
Normal file
27
kompute/src/Core.cpp
Normal file
|
@ -0,0 +1,27 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#ifndef KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
#define KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
/**
|
||||
* Ensures support for dynamic loading of Vulkan functions on Android.
|
||||
* Acts as a default store for loaded functions.
|
||||
* More information:
|
||||
* https://github.com/KhronosGroup/Vulkan-Hpp#vulkan_hpp_default_dispatcher
|
||||
**/
|
||||
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
#endif // !KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
namespace kp {
|
||||
} // namespace kp
|
493
kompute/src/Manager.cpp
Normal file
493
kompute/src/Manager.cpp
Normal file
|
@ -0,0 +1,493 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/Manager.hpp"
|
||||
#include "fmt/format.h"
|
||||
#include "kompute/logger/Logger.hpp"
|
||||
#include <fmt/core.h>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace kp {
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||
debugMessageCallback(VkDebugReportFlagsEXT /*flags*/,
|
||||
VkDebugReportObjectTypeEXT /*objectType*/,
|
||||
uint64_t /*object*/,
|
||||
size_t /*location*/,
|
||||
int32_t /*messageCode*/,
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_DEBUG
|
||||
const char* pLayerPrefix,
|
||||
const char* pMessage,
|
||||
#else
|
||||
const char* /*pLayerPrefix*/,
|
||||
const char* /*pMessage*/,
|
||||
#endif
|
||||
void* /*pUserData*/)
|
||||
{
|
||||
KP_LOG_DEBUG("[VALIDATION]: {} - {}", pLayerPrefix, pMessage);
|
||||
return VK_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
Manager::Manager()
|
||||
{
|
||||
this->mManageResources = true;
|
||||
|
||||
// Make sure the logger is setup
|
||||
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
|
||||
logger::setupLogger();
|
||||
#endif
|
||||
this->createInstance();
|
||||
}
|
||||
|
||||
void Manager::initializeDevice(uint32_t physicalDeviceIndex,
|
||||
const std::vector<uint32_t>& familyQueueIndices,
|
||||
const std::vector<std::string>& desiredExtensions)
|
||||
{
|
||||
this->createDevice(
|
||||
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager Destructor started");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Manager::destroy()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager destroy() started");
|
||||
|
||||
if (this->mDevice == nullptr) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Manager destructor reached with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mManageResources && this->mManagedSequences.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
|
||||
"managed sequences");
|
||||
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
|
||||
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
|
||||
sq->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedSequences.clear();
|
||||
}
|
||||
|
||||
if (this->mManageResources && this->mManagedAlgorithms.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
|
||||
this->mManagedAlgorithms) {
|
||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
||||
algorithm->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedAlgorithms.clear();
|
||||
}
|
||||
|
||||
if (this->mManageResources && this->mManagedTensors.size()) {
|
||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
|
||||
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
|
||||
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
|
||||
tensor->destroy();
|
||||
}
|
||||
}
|
||||
this->mManagedTensors.clear();
|
||||
}
|
||||
|
||||
if (this->mFreeDevice) {
|
||||
KP_LOG_INFO("Destroying device");
|
||||
this->mDevice->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDevice = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Device");
|
||||
}
|
||||
|
||||
if (this->mInstance == nullptr) {
|
||||
KP_LOG_ERROR(
|
||||
"Kompute Manager destructor reached with null Instance pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
if (this->mDebugReportCallback) {
|
||||
this->mInstance->destroyDebugReportCallbackEXT(
|
||||
this->mDebugReportCallback, nullptr, this->mDebugDispatcher);
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (this->mFreeInstance) {
|
||||
this->mInstance->destroy(
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mInstance = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createInstance()
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating instance");
|
||||
|
||||
this->mFreeInstance = true;
|
||||
|
||||
vk::ApplicationInfo applicationInfo;
|
||||
applicationInfo.pApplicationName = "Kompute";
|
||||
applicationInfo.pEngineName = "Kompute";
|
||||
applicationInfo.apiVersion = KOMPUTE_VK_API_VERSION;
|
||||
applicationInfo.engineVersion = KOMPUTE_VK_API_VERSION;
|
||||
applicationInfo.applicationVersion = KOMPUTE_VK_API_VERSION;
|
||||
|
||||
std::vector<const char*> applicationExtensions;
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
applicationExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
|
||||
#endif
|
||||
|
||||
vk::InstanceCreateInfo computeInstanceCreateInfo;
|
||||
computeInstanceCreateInfo.pApplicationInfo = &applicationInfo;
|
||||
if (!applicationExtensions.empty()) {
|
||||
computeInstanceCreateInfo.enabledExtensionCount =
|
||||
(uint32_t)applicationExtensions.size();
|
||||
computeInstanceCreateInfo.ppEnabledExtensionNames =
|
||||
applicationExtensions.data();
|
||||
}
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
KP_LOG_DEBUG("Kompute Manager adding debug validation layers");
|
||||
// We'll identify the layers that are supported
|
||||
std::vector<const char*> validLayerNames;
|
||||
std::vector<const char*> desiredLayerNames = {
|
||||
"VK_LAYER_LUNARG_assistant_layer",
|
||||
"VK_LAYER_LUNARG_standard_validation",
|
||||
"VK_LAYER_KHRONOS_validation",
|
||||
};
|
||||
std::vector<std::string> envLayerNames;
|
||||
const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS");
|
||||
if (envLayerNamesVal != nullptr && *envLayerNamesVal != '\0') {
|
||||
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}",
|
||||
envLayerNamesVal);
|
||||
std::istringstream iss(envLayerNamesVal);
|
||||
std::istream_iterator<std::string> beg(iss);
|
||||
std::istream_iterator<std::string> end;
|
||||
envLayerNames = std::vector<std::string>(beg, end);
|
||||
for (const std::string& layerName : envLayerNames) {
|
||||
desiredLayerNames.push_back(layerName.c_str());
|
||||
}
|
||||
KP_LOG_DEBUG("Desired layers: {}", fmt::join(desiredLayerNames, ", "));
|
||||
}
|
||||
|
||||
// Identify the valid layer names based on the desiredLayerNames
|
||||
{
|
||||
std::set<std::string> uniqueLayerNames;
|
||||
std::vector<vk::LayerProperties> availableLayerProperties =
|
||||
vk::enumerateInstanceLayerProperties();
|
||||
for (vk::LayerProperties layerProperties : availableLayerProperties) {
|
||||
std::string layerName(layerProperties.layerName.data());
|
||||
uniqueLayerNames.insert(layerName);
|
||||
}
|
||||
KP_LOG_DEBUG("Available layers: {}", fmt::join(uniqueLayerNames, ", "));
|
||||
for (const char* desiredLayerName : desiredLayerNames) {
|
||||
if (uniqueLayerNames.count(desiredLayerName) != 0) {
|
||||
validLayerNames.push_back(desiredLayerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!validLayerNames.empty()) {
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Manager Initializing instance with valid layers: {}",
|
||||
fmt::join(validLayerNames, ", "));
|
||||
computeInstanceCreateInfo.enabledLayerCount =
|
||||
static_cast<uint32_t>(validLayerNames.size());
|
||||
computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data();
|
||||
} else {
|
||||
KP_LOG_WARN("Kompute Manager no valid layer names found from desired "
|
||||
"layer names");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
vk::DynamicLoader dl;
|
||||
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
|
||||
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
this->mInstance = std::make_shared<vk::Instance>();
|
||||
vk::createInstance(
|
||||
&computeInstanceCreateInfo, nullptr, this->mInstance.get());
|
||||
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(*this->mInstance);
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager Instance Created");
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
KP_LOG_DEBUG("Kompute Manager adding debug callbacks");
|
||||
if (validLayerNames.size() > 0) {
|
||||
vk::DebugReportFlagsEXT debugFlags =
|
||||
vk::DebugReportFlagBitsEXT::eError |
|
||||
vk::DebugReportFlagBitsEXT::eWarning;
|
||||
vk::DebugReportCallbackCreateInfoEXT debugCreateInfo = {};
|
||||
debugCreateInfo.pfnCallback =
|
||||
(PFN_vkDebugReportCallbackEXT)debugMessageCallback;
|
||||
debugCreateInfo.flags = debugFlags;
|
||||
|
||||
this->mDebugDispatcher.init(*this->mInstance, &vkGetInstanceProcAddr);
|
||||
this->mDebugReportCallback =
|
||||
this->mInstance->createDebugReportCallbackEXT(
|
||||
debugCreateInfo, nullptr, this->mDebugDispatcher);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Manager::clear()
|
||||
{
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.erase(
|
||||
std::remove_if(begin(this->mManagedTensors),
|
||||
end(this->mManagedTensors),
|
||||
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
|
||||
end(this->mManagedTensors));
|
||||
this->mManagedAlgorithms.erase(
|
||||
std::remove_if(
|
||||
begin(this->mManagedAlgorithms),
|
||||
end(this->mManagedAlgorithms),
|
||||
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
|
||||
end(this->mManagedAlgorithms));
|
||||
this->mManagedSequences.erase(
|
||||
std::remove_if(begin(this->mManagedSequences),
|
||||
end(this->mManagedSequences),
|
||||
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
|
||||
end(this->mManagedSequences));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
||||
uint32_t physicalDeviceIndex,
|
||||
const std::vector<std::string>& desiredExtensions)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager creating Device");
|
||||
|
||||
if (this->mInstance == nullptr) {
|
||||
throw std::runtime_error("Kompute Manager instance is null");
|
||||
}
|
||||
|
||||
this->mFreeDevice = true;
|
||||
|
||||
// Getting an integer that says how many vuklan devices we have
|
||||
std::vector<vk::PhysicalDevice> physicalDevices =
|
||||
this->mInstance->enumeratePhysicalDevices();
|
||||
uint32_t deviceCount = physicalDevices.size();
|
||||
|
||||
// This means there are no devices at all
|
||||
if (deviceCount == 0) {
|
||||
throw std::runtime_error("Failed to find GPUs with Vulkan support! "
|
||||
"Maybe you haven't installed vulkan drivers?");
|
||||
}
|
||||
|
||||
// This means that we're exceeding our device limit, for
|
||||
// example if we have 2 devices, just physicalDeviceIndex
|
||||
// 0 and 1 are acceptable. Hence, physicalDeviceIndex should
|
||||
// always be less than deviceCount, else we raise an error
|
||||
if (!(deviceCount > physicalDeviceIndex)) {
|
||||
throw std::runtime_error("There is no such physical index or device, "
|
||||
"please use your existing device");
|
||||
}
|
||||
|
||||
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
|
||||
|
||||
this->mPhysicalDevice =
|
||||
std::make_shared<vk::PhysicalDevice>(physicalDevice);
|
||||
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_INFO
|
||||
vk::PhysicalDeviceProperties physicalDeviceProperties =
|
||||
physicalDevice.getProperties();
|
||||
#endif
|
||||
|
||||
KP_LOG_INFO("Using physical device index {} found {}",
|
||||
physicalDeviceIndex,
|
||||
physicalDeviceProperties.deviceName);
|
||||
|
||||
if (familyQueueIndices.empty()) {
|
||||
// Find compute queue
|
||||
std::vector<vk::QueueFamilyProperties> allQueueFamilyProperties =
|
||||
physicalDevice.getQueueFamilyProperties();
|
||||
|
||||
uint32_t computeQueueFamilyIndex = 0;
|
||||
bool computeQueueSupported = false;
|
||||
for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) {
|
||||
vk::QueueFamilyProperties queueFamilyProperties =
|
||||
allQueueFamilyProperties[i];
|
||||
|
||||
if (queueFamilyProperties.queueFlags &
|
||||
vk::QueueFlagBits::eCompute) {
|
||||
computeQueueFamilyIndex = i;
|
||||
computeQueueSupported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!computeQueueSupported) {
|
||||
throw std::runtime_error("Compute queue is not supported");
|
||||
}
|
||||
|
||||
this->mComputeQueueFamilyIndices.push_back(computeQueueFamilyIndex);
|
||||
} else {
|
||||
this->mComputeQueueFamilyIndices = familyQueueIndices;
|
||||
}
|
||||
|
||||
std::unordered_map<uint32_t, uint32_t> familyQueueCounts;
|
||||
std::unordered_map<uint32_t, std::vector<float>> familyQueuePriorities;
|
||||
for (const auto& value : this->mComputeQueueFamilyIndices) {
|
||||
familyQueueCounts[value]++;
|
||||
familyQueuePriorities[value].push_back(1.0f);
|
||||
}
|
||||
|
||||
std::unordered_map<uint32_t, uint32_t> familyQueueIndexCount;
|
||||
std::vector<vk::DeviceQueueCreateInfo> deviceQueueCreateInfos;
|
||||
for (const auto& familyQueueInfo : familyQueueCounts) {
|
||||
// Setting the device count to 0
|
||||
familyQueueIndexCount[familyQueueInfo.first] = 0;
|
||||
|
||||
// Creating the respective device queue
|
||||
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(
|
||||
vk::DeviceQueueCreateFlags(),
|
||||
familyQueueInfo.first,
|
||||
familyQueueInfo.second,
|
||||
familyQueuePriorities[familyQueueInfo.first].data());
|
||||
deviceQueueCreateInfos.push_back(deviceQueueCreateInfo);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager desired extension layers {}",
|
||||
fmt::join(desiredExtensions, ", "));
|
||||
|
||||
std::vector<vk::ExtensionProperties> deviceExtensions =
|
||||
this->mPhysicalDevice->enumerateDeviceExtensionProperties();
|
||||
|
||||
std::set<std::string> uniqueExtensionNames;
|
||||
for (const vk::ExtensionProperties& ext : deviceExtensions) {
|
||||
uniqueExtensionNames.insert(ext.extensionName);
|
||||
}
|
||||
KP_LOG_DEBUG("Kompute Manager available extensions {}",
|
||||
fmt::join(uniqueExtensionNames, ", "));
|
||||
std::vector<const char*> validExtensions;
|
||||
for (const std::string& ext : desiredExtensions) {
|
||||
if (uniqueExtensionNames.count(ext) != 0) {
|
||||
validExtensions.push_back(ext.c_str());
|
||||
}
|
||||
}
|
||||
if (desiredExtensions.size() != validExtensions.size()) {
|
||||
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}",
|
||||
fmt::join(validExtensions, ", "));
|
||||
}
|
||||
|
||||
vk::PhysicalDeviceFeatures features;
|
||||
features.shaderInt16 = true;
|
||||
|
||||
vk::PhysicalDeviceVulkan11Features features11;
|
||||
features11.uniformAndStorageBuffer16BitAccess = true;
|
||||
features11.storageBuffer16BitAccess = true;
|
||||
features11.pNext = nullptr;
|
||||
|
||||
vk::PhysicalDeviceVulkan12Features features12;
|
||||
features12.storageBuffer8BitAccess = true;
|
||||
features12.uniformAndStorageBuffer8BitAccess = true;
|
||||
features12.shaderFloat16 = true;
|
||||
features12.shaderInt8 = true;
|
||||
features12.pNext = &features11;
|
||||
|
||||
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
|
||||
deviceQueueCreateInfos.size(),
|
||||
deviceQueueCreateInfos.data(),
|
||||
{},
|
||||
{},
|
||||
validExtensions.size(),
|
||||
validExtensions.data(),
|
||||
&features);
|
||||
|
||||
deviceCreateInfo.pNext = &features12;
|
||||
|
||||
this->mDevice = std::make_shared<vk::Device>();
|
||||
vk::Result r = physicalDevice.createDevice(
|
||||
&deviceCreateInfo, nullptr, this->mDevice.get());
|
||||
if (r != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Kompute Manager could not create device");
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager device created");
|
||||
|
||||
for (const uint32_t& familyQueueIndex : this->mComputeQueueFamilyIndices) {
|
||||
std::shared_ptr<vk::Queue> currQueue = std::make_shared<vk::Queue>();
|
||||
|
||||
this->mDevice->getQueue(familyQueueIndex,
|
||||
familyQueueIndexCount[familyQueueIndex],
|
||||
currQueue.get());
|
||||
|
||||
familyQueueIndexCount[familyQueueIndex]++;
|
||||
|
||||
this->mComputeQueues.push_back(currQueue);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
|
||||
|
||||
std::shared_ptr<Sequence> sq{ new kp::Sequence(
|
||||
this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
this->mComputeQueues[queueIndex],
|
||||
this->mComputeQueueFamilyIndices[queueIndex],
|
||||
totalTimestamps) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedSequences.push_back(sq);
|
||||
}
|
||||
|
||||
return sq;
|
||||
}
|
||||
|
||||
vk::PhysicalDeviceProperties
|
||||
Manager::getDeviceProperties() const
|
||||
{
|
||||
return this->mPhysicalDevice->getProperties();
|
||||
}
|
||||
|
||||
std::vector<vk::PhysicalDevice>
|
||||
Manager::listDevices() const
|
||||
{
|
||||
return this->mInstance->enumeratePhysicalDevices();
|
||||
}
|
||||
|
||||
std::shared_ptr<vk::Instance>
|
||||
Manager::getVkInstance() const
|
||||
{
|
||||
return this->mInstance;
|
||||
}
|
||||
|
||||
}
|
65
kompute/src/OpAlgoDispatch.cpp
Normal file
65
kompute/src/OpAlgoDispatch.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpAlgoDispatch::~OpAlgoDispatch()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
|
||||
|
||||
if (this->mPushConstantsData) {
|
||||
KP_LOG_DEBUG("Kompute freeing push constants data");
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
for (const std::shared_ptr<Tensor>& tensor :
|
||||
this->mAlgorithm->getTensors()) {
|
||||
tensor->recordPrimaryBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eComputeShader);
|
||||
}
|
||||
|
||||
if (this->mPushConstantsSize) {
|
||||
this->mAlgorithm->setPushConstants(
|
||||
this->mPushConstantsData,
|
||||
this->mPushConstantsSize,
|
||||
this->mPushConstantsDataTypeMemorySize);
|
||||
}
|
||||
|
||||
this->mAlgorithm->recordBindCore(commandBuffer);
|
||||
this->mAlgorithm->recordBindPush(commandBuffer);
|
||||
this->mAlgorithm->recordDispatch(commandBuffer);
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpAlgoDispatch::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called");
|
||||
}
|
||||
|
||||
}
|
51
kompute/src/OpBufferSyncDevice.cpp
Normal file
51
kompute/src/OpBufferSyncDevice.cpp
Normal file
|
@ -0,0 +1,51 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpBufferSyncDevice.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpBufferSyncDevice::OpBufferSyncDevice(
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize size)
|
||||
: mPrimaryBuffer(primaryBuffer)
|
||||
, mStagingBuffer(stagingBuffer)
|
||||
, mSize(size)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncDevice constructor with params");
|
||||
}
|
||||
|
||||
OpBufferSyncDevice::~OpBufferSyncDevice()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncDevice destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncDevice::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncDevice record called");
|
||||
vk::BufferCopy copyRegion(0, 0, mSize);
|
||||
commandBuffer.copyBuffer(*mStagingBuffer, *mPrimaryBuffer, copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncDevice preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncDevice postEval called");
|
||||
}
|
||||
|
||||
}
|
51
kompute/src/OpBufferSyncLocal.cpp
Normal file
51
kompute/src/OpBufferSyncLocal.cpp
Normal file
|
@ -0,0 +1,51 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpBufferSyncLocal.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpBufferSyncLocal::OpBufferSyncLocal(
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize size)
|
||||
: mPrimaryBuffer(primaryBuffer)
|
||||
, mStagingBuffer(stagingBuffer)
|
||||
, mSize(size)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncLocal constructor with params");
|
||||
}
|
||||
|
||||
OpBufferSyncLocal::~OpBufferSyncLocal()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncLocal destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncLocal::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncLocal record called");
|
||||
vk::BufferCopy copyRegion(0, 0, mSize);
|
||||
commandBuffer.copyBuffer(*mPrimaryBuffer, *mStagingBuffer, copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncLocal preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpBufferSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpBufferSyncLocal postEval called");
|
||||
}
|
||||
|
||||
}
|
74
kompute/src/OpMemoryBarrier.cpp
Normal file
74
kompute/src/OpMemoryBarrier.cpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpMemoryBarrier.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpMemoryBarrier::OpMemoryBarrier(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const vk::AccessFlagBits& srcAccessMask,
|
||||
const vk::AccessFlagBits& dstAccessMask,
|
||||
const vk::PipelineStageFlagBits& srcStageMask,
|
||||
const vk::PipelineStageFlagBits& dstStageMask,
|
||||
bool barrierOnPrimary)
|
||||
: mSrcAccessMask(srcAccessMask)
|
||||
, mDstAccessMask(dstAccessMask)
|
||||
, mSrcStageMask(srcStageMask)
|
||||
, mDstStageMask(dstStageMask)
|
||||
, mBarrierOnPrimary(barrierOnPrimary)
|
||||
, mTensors(tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor");
|
||||
}
|
||||
|
||||
OpMemoryBarrier::~OpMemoryBarrier()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMemoryBarrier destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMemoryBarrier record called");
|
||||
|
||||
// Barrier to ensure the data is finished writing to buffer memory
|
||||
if (this->mBarrierOnPrimary) {
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
|
||||
tensor->recordPrimaryBufferMemoryBarrier(commandBuffer,
|
||||
this->mSrcAccessMask,
|
||||
this->mDstAccessMask,
|
||||
this->mSrcStageMask,
|
||||
this->mDstStageMask);
|
||||
}
|
||||
} else {
|
||||
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
|
||||
tensor->recordStagingBufferMemoryBarrier(commandBuffer,
|
||||
this->mSrcAccessMask,
|
||||
this->mDstAccessMask,
|
||||
this->mSrcStageMask,
|
||||
this->mDstStageMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpMemoryBarrier::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMemoryBarrier preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpMemoryBarrier::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMemoryBarrier postSubmit called");
|
||||
}
|
||||
|
||||
}
|
90
kompute/src/OpTensorCopy.cpp
Normal file
90
kompute/src/OpTensorCopy.cpp
Normal file
|
@ -0,0 +1,90 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpTensorCopy.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
|
||||
|
||||
this->mTensors = tensors;
|
||||
|
||||
if (this->mTensors.size() < 2) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorCopy called with less than 2 tensor");
|
||||
}
|
||||
|
||||
kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
|
||||
uint32_t size = this->mTensors[0]->size();
|
||||
for (const std::shared_ptr<Tensor>& tensor : tensors) {
|
||||
if (tensor->dataType() != dataType) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"Attempting to copy tensors of different types from {} to {}",
|
||||
Tensor::toString(dataType),
|
||||
Tensor::toString(tensor->dataType())));
|
||||
}
|
||||
if (tensor->size() != size) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"Attempting to copy tensors of different sizes from {} to {}",
|
||||
size,
|
||||
tensor->size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OpTensorCopy::~OpTensorCopy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy record called");
|
||||
|
||||
// We iterate from the second tensor onwards and record a copy to all
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
this->mTensors[i]->recordCopyFrom(commandBuffer, this->mTensors[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
|
||||
|
||||
// Do not copy on CPU side if source is storage tensor
|
||||
if (this->mTensors[0]->tensorType() == kp::Tensor::TensorTypes::eStorage)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy not copying tensor source given it's of eStorage type");
|
||||
return;
|
||||
}
|
||||
void* data = this->mTensors[0]->rawData();
|
||||
|
||||
// Copy the data from the first tensor into all the tensors
|
||||
for (size_t i = 1; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == kp::Tensor::TensorTypes::eStorage) {
|
||||
KP_LOG_DEBUG("Kompute OpTensorCopy not copying to tensor dest given it's of eStorage type");
|
||||
continue;
|
||||
}
|
||||
this->mTensors[i]->setRawData(data);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
61
kompute/src/OpTensorSyncDevice.cpp
Normal file
61
kompute/src/OpTensorSyncDevice.cpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/operations/OpTensorSyncDevice.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncDevice::OpTensorSyncDevice(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
: mPrimaryBuffer(nullptr)
|
||||
, mStagingBuffer(nullptr)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncDevice called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncDevice::~OpTensorSyncDevice()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
|
||||
|
||||
this->mTensors.clear();
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncDevice postEval called");
|
||||
}
|
||||
|
||||
}
|
76
kompute/src/OpTensorSyncLocal.cpp
Normal file
76
kompute/src/OpTensorSyncLocal.cpp
Normal file
|
@ -0,0 +1,76 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpTensorSyncLocal.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
OpTensorSyncLocal::OpTensorSyncLocal(
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
|
||||
|
||||
if (tensors.size() < 1) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpTensorSyncLocal called with less than 1 tensor");
|
||||
}
|
||||
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
OpTensorSyncLocal::~OpTensorSyncLocal()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal destructor started");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called");
|
||||
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
|
||||
|
||||
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer);
|
||||
|
||||
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);
|
||||
|
||||
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
|
||||
commandBuffer,
|
||||
vk::AccessFlagBits::eTransferWrite,
|
||||
vk::AccessFlagBits::eHostRead,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eHost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal preEval called");
|
||||
}
|
||||
|
||||
void
|
||||
OpTensorSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
|
||||
|
||||
KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
|
||||
}
|
||||
|
||||
}
|
396
kompute/src/Sequence.cpp
Normal file
396
kompute/src/Sequence.cpp
Normal file
|
@ -0,0 +1,396 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/Sequence.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::Queue> computeQueue,
|
||||
uint32_t queueIndex,
|
||||
uint32_t totalTimestamps)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue");
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mComputeQueue = computeQueue;
|
||||
this->mQueueIndex = queueIndex;
|
||||
|
||||
this->createCommandPool();
|
||||
this->createCommandBuffer();
|
||||
if (totalTimestamps > 0)
|
||||
this->createTimestampQueryPool(totalTimestamps +
|
||||
1); //+1 for the first one
|
||||
}
|
||||
|
||||
Sequence::~Sequence()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence Destructor started");
|
||||
|
||||
if (this->mDevice) {
|
||||
this->destroy();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::begin()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence called BEGIN");
|
||||
|
||||
if (this->isRecording()) {
|
||||
KP_LOG_DEBUG("Kompute Sequence begin called when already recording");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->isRunning()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence begin called when sequence still running");
|
||||
}
|
||||
|
||||
KP_LOG_INFO("Kompute Sequence command now started recording");
|
||||
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
|
||||
this->mRecording = true;
|
||||
|
||||
// latch the first timestamp before any commands are submitted
|
||||
if (this->timestampQueryPool)
|
||||
this->mCommandBuffer->writeTimestamp(
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool,
|
||||
0);
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::end()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling END");
|
||||
|
||||
if (this->isRunning()) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence begin called when sequence still running");
|
||||
}
|
||||
|
||||
if (!this->isRecording()) {
|
||||
KP_LOG_WARN("Kompute Sequence end called when not recording");
|
||||
return;
|
||||
} else {
|
||||
KP_LOG_INFO("Kompute Sequence command recording END");
|
||||
this->mCommandBuffer->end();
|
||||
this->mRecording = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::clear()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence calling clear");
|
||||
if (this->isRecording()) {
|
||||
this->end();
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute sequence EVAL BEGIN");
|
||||
|
||||
return this->evalAsync()->evalAwait();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::eval(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
this->clear();
|
||||
return this->record(op)->eval();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync()
|
||||
{
|
||||
if (this->isRecording()) {
|
||||
this->end();
|
||||
}
|
||||
|
||||
if (this->mIsRunning) {
|
||||
throw std::runtime_error(
|
||||
"Kompute Sequence evalAsync called when an eval async was "
|
||||
"called without successful wait");
|
||||
}
|
||||
|
||||
this->mIsRunning = true;
|
||||
|
||||
for (size_t i = 0; i < this->mOperations.size(); i++) {
|
||||
this->mOperations[i]->preEval(*this->mCommandBuffer);
|
||||
}
|
||||
|
||||
vk::SubmitInfo submitInfo(
|
||||
0, nullptr, nullptr, 1, this->mCommandBuffer.get());
|
||||
|
||||
this->mFence = this->mDevice->createFence(vk::FenceCreateInfo());
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute sequence submitting command buffer into compute queue");
|
||||
|
||||
this->mComputeQueue->submit(1, &submitInfo, this->mFence);
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAsync(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
this->clear();
|
||||
this->record(op);
|
||||
this->evalAsync();
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::evalAwait(uint64_t waitFor)
|
||||
{
|
||||
if (!this->mIsRunning) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval");
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
vk::Result result =
|
||||
this->mDevice->waitForFences(1, &this->mFence, VK_TRUE, waitFor);
|
||||
this->mDevice->destroy(
|
||||
this->mFence, (vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
this->mIsRunning = false;
|
||||
|
||||
if (result == vk::Result::eTimeout) {
|
||||
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}",
|
||||
waitFor);
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < this->mOperations.size(); i++) {
|
||||
this->mOperations[i]->postEval(*this->mCommandBuffer);
|
||||
}
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::isRunning() const
|
||||
{
|
||||
return this->mIsRunning;
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::isRecording() const
|
||||
{
|
||||
return this->mRecording;
|
||||
}
|
||||
|
||||
bool
|
||||
Sequence::isInit() const
|
||||
{
|
||||
return this->mDevice && this->mCommandPool && this->mCommandBuffer &&
|
||||
this->mComputeQueue;
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::rerecord()
|
||||
{
|
||||
this->end();
|
||||
std::vector<std::shared_ptr<OpBase>> ops = this->mOperations;
|
||||
this->mOperations.clear();
|
||||
for (const std::shared_ptr<kp::OpBase>& op : ops) {
|
||||
this->record(op);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence destroy called");
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute Sequence destroy called "
|
||||
"with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreeCommandBuffer) {
|
||||
KP_LOG_INFO("Freeing CommandBuffer");
|
||||
if (!this->mCommandBuffer) {
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->freeCommandBuffers(
|
||||
*this->mCommandPool, 1, this->mCommandBuffer.get());
|
||||
|
||||
this->mCommandBuffer = nullptr;
|
||||
this->mFreeCommandBuffer = false;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence Freed CommandBuffer");
|
||||
}
|
||||
|
||||
if (this->mFreeCommandPool) {
|
||||
KP_LOG_INFO("Destroying CommandPool");
|
||||
if (this->mCommandPool == nullptr) {
|
||||
KP_LOG_WARN("Kompute Sequence destroy called with null "
|
||||
"CommandPool pointer");
|
||||
return;
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mCommandPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
this->mCommandPool = nullptr;
|
||||
this->mFreeCommandPool = false;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Sequence Destroyed CommandPool");
|
||||
}
|
||||
|
||||
if (this->mOperations.size()) {
|
||||
KP_LOG_INFO("Kompute Sequence clearing operations buffer");
|
||||
this->mOperations.clear();
|
||||
}
|
||||
|
||||
if (this->timestampQueryPool) {
|
||||
KP_LOG_INFO("Destroying QueryPool");
|
||||
this->mDevice->destroy(
|
||||
*this->timestampQueryPool,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
|
||||
this->timestampQueryPool = nullptr;
|
||||
KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool");
|
||||
}
|
||||
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
if (this->mPhysicalDevice) {
|
||||
this->mPhysicalDevice = nullptr;
|
||||
}
|
||||
if (this->mComputeQueue) {
|
||||
this->mComputeQueue = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Sequence>
|
||||
Sequence::record(std::shared_ptr<OpBase> op)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence record function started");
|
||||
|
||||
this->begin();
|
||||
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Sequence running record on OpBase derived class instance");
|
||||
|
||||
op->record(*this->mCommandBuffer);
|
||||
|
||||
this->mOperations.push_back(op);
|
||||
|
||||
if (this->timestampQueryPool)
|
||||
this->mCommandBuffer->writeTimestamp(
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
*this->timestampQueryPool,
|
||||
this->mOperations.size());
|
||||
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::createCommandPool()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence creating command pool");
|
||||
|
||||
if (!this->mDevice) {
|
||||
throw std::runtime_error("Kompute Sequence device is null");
|
||||
}
|
||||
|
||||
this->mFreeCommandPool = true;
|
||||
|
||||
vk::CommandPoolCreateInfo commandPoolInfo(vk::CommandPoolCreateFlags(),
|
||||
this->mQueueIndex);
|
||||
this->mCommandPool = std::make_shared<vk::CommandPool>();
|
||||
this->mDevice->createCommandPool(
|
||||
&commandPoolInfo, nullptr, this->mCommandPool.get());
|
||||
KP_LOG_DEBUG("Kompute Sequence Command Pool Created");
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::createCommandBuffer()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence creating command buffer");
|
||||
if (!this->mDevice) {
|
||||
throw std::runtime_error("Kompute Sequence device is null");
|
||||
}
|
||||
if (!this->mCommandPool) {
|
||||
throw std::runtime_error("Kompute Sequence command pool is null");
|
||||
}
|
||||
|
||||
this->mFreeCommandBuffer = true;
|
||||
|
||||
vk::CommandBufferAllocateInfo commandBufferAllocateInfo(
|
||||
*this->mCommandPool, vk::CommandBufferLevel::ePrimary, 1);
|
||||
|
||||
this->mCommandBuffer = std::make_shared<vk::CommandBuffer>();
|
||||
this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo,
|
||||
this->mCommandBuffer.get());
|
||||
KP_LOG_DEBUG("Kompute Sequence Command Buffer Created");
|
||||
}
|
||||
|
||||
void
|
||||
Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Sequence creating query pool");
|
||||
if (!this->isInit()) {
|
||||
throw std::runtime_error(
|
||||
"createTimestampQueryPool() called on uninitialized Sequence");
|
||||
}
|
||||
if (!this->mPhysicalDevice) {
|
||||
throw std::runtime_error("Kompute Sequence physical device is null");
|
||||
}
|
||||
|
||||
vk::PhysicalDeviceProperties physicalDeviceProperties =
|
||||
this->mPhysicalDevice->getProperties();
|
||||
|
||||
if (physicalDeviceProperties.limits.timestampComputeAndGraphics) {
|
||||
vk::QueryPoolCreateInfo queryPoolInfo;
|
||||
queryPoolInfo.setQueryCount(totalTimestamps);
|
||||
queryPoolInfo.setQueryType(vk::QueryType::eTimestamp);
|
||||
this->timestampQueryPool = std::make_shared<vk::QueryPool>(
|
||||
this->mDevice->createQueryPool(queryPoolInfo));
|
||||
|
||||
KP_LOG_DEBUG("Query pool for timestamps created");
|
||||
} else {
|
||||
throw std::runtime_error("Device does not support timestamps");
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::uint64_t>
|
||||
Sequence::getTimestamps()
|
||||
{
|
||||
if (!this->timestampQueryPool)
|
||||
throw std::runtime_error("Timestamp latching not enabled");
|
||||
|
||||
const auto n = this->mOperations.size() + 1;
|
||||
std::vector<std::uint64_t> timestamps(n, 0);
|
||||
this->mDevice->getQueryPoolResults(
|
||||
*this->timestampQueryPool,
|
||||
0,
|
||||
n,
|
||||
timestamps.size() * sizeof(std::uint64_t),
|
||||
timestamps.data(),
|
||||
sizeof(uint64_t),
|
||||
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
|
||||
|
||||
return timestamps;
|
||||
}
|
||||
|
||||
}
|
451
kompute/src/Tensor.cpp
Normal file
451
kompute/src/Tensor.cpp
Normal file
|
@ -0,0 +1,451 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
std::string
|
||||
Tensor::toString(Tensor::TensorDataTypes dt)
|
||||
{
|
||||
switch (dt) {
|
||||
case TensorDataTypes::eBool:
|
||||
return "eBool";
|
||||
case TensorDataTypes::eInt:
|
||||
return "eInt";
|
||||
case TensorDataTypes::eUnsignedInt:
|
||||
return "eUnsignedInt";
|
||||
case TensorDataTypes::eFloat:
|
||||
return "eFloat";
|
||||
case TensorDataTypes::eDouble:
|
||||
return "eDouble";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
Tensor::toString(Tensor::TensorTypes dt)
|
||||
{
|
||||
switch (dt) {
|
||||
case TensorTypes::eDevice:
|
||||
return "eDevice";
|
||||
case TensorTypes::eHost:
|
||||
return "eHost";
|
||||
case TensorTypes::eStorage:
|
||||
return "eStorage";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t elementMemorySize,
|
||||
const TensorDataTypes& dataType,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset,
|
||||
const TensorTypes& tensorType)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
|
||||
elementTotalCount,
|
||||
Tensor::toString(tensorType));
|
||||
|
||||
this->mPhysicalDevice = physicalDevice;
|
||||
this->mDevice = device;
|
||||
this->mDataType = dataType;
|
||||
this->mTensorType = tensorType;
|
||||
|
||||
this->rebuild(data, elementTotalCount, elementMemorySize, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset);
|
||||
}
|
||||
|
||||
Tensor::~Tensor()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}",
|
||||
Tensor::toString(this->tensorType()));
|
||||
|
||||
if (this->mDevice) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor destructor success");
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::rebuild(void* /*data*/,
|
||||
uint32_t elementTotalCount,
|
||||
uint64_t memorySize,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
|
||||
|
||||
this->mSize = elementTotalCount;
|
||||
this->mMemorySize = memorySize;
|
||||
this->mOffset = offset;
|
||||
|
||||
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
|
||||
KP_LOG_DEBUG(
|
||||
"Kompute Tensor destroying existing resources before rebuild");
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->setGPUResources(primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset);
|
||||
}
|
||||
|
||||
Tensor::TensorTypes
|
||||
Tensor::tensorType()
|
||||
{
|
||||
return this->mTensorType;
|
||||
}
|
||||
|
||||
bool
|
||||
Tensor::isInit()
|
||||
{
|
||||
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory &&
|
||||
this->mRawData;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
Tensor::size()
|
||||
{
|
||||
return this->mSize;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tensor::memorySize()
|
||||
{
|
||||
return this->mMemorySize;
|
||||
}
|
||||
|
||||
kp::Tensor::TensorDataTypes
|
||||
Tensor::dataType()
|
||||
{
|
||||
return this->mDataType;
|
||||
}
|
||||
|
||||
void*
|
||||
Tensor::rawData()
|
||||
{
|
||||
return this->mRawData;
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::setRawData(const void* data)
|
||||
{
|
||||
memcpy(this->mRawData, data, this->memorySize());
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<Tensor> copyFromTensor)
|
||||
{
|
||||
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
copyFromTensor->mPrimaryBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
if (!this->mStagingBuffer)
|
||||
return;
|
||||
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mStagingBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
bufferSize,
|
||||
copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
if (!this->mStagingBuffer)
|
||||
return;
|
||||
|
||||
vk::DeviceSize bufferSize(this->memorySize());
|
||||
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
|
||||
|
||||
this->recordCopyBuffer(commandBuffer,
|
||||
this->mPrimaryBuffer,
|
||||
this->mStagingBuffer,
|
||||
bufferSize,
|
||||
copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
vk::Buffer *bufferFrom,
|
||||
vk::Buffer *bufferTo,
|
||||
vk::DeviceSize /*bufferSize*/,
|
||||
vk::BufferCopy copyRegion)
|
||||
{
|
||||
|
||||
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
|
||||
|
||||
this->recordBufferMemoryBarrier(commandBuffer,
|
||||
*this->mPrimaryBuffer,
|
||||
srcAccessMask,
|
||||
dstAccessMask,
|
||||
srcStageMask,
|
||||
dstStageMask);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
{
|
||||
if (!this->mStagingBuffer)
|
||||
return;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor recording STAGING buffer memory barrier");
|
||||
|
||||
this->recordBufferMemoryBarrier(commandBuffer,
|
||||
*this->mStagingBuffer,
|
||||
srcAccessMask,
|
||||
dstAccessMask,
|
||||
srcStageMask,
|
||||
dstStageMask);
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
const vk::Buffer& buffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier");
|
||||
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
|
||||
vk::BufferMemoryBarrier bufferMemoryBarrier;
|
||||
bufferMemoryBarrier.buffer = buffer;
|
||||
bufferMemoryBarrier.size = bufferSize;
|
||||
bufferMemoryBarrier.srcAccessMask = srcAccessMask;
|
||||
bufferMemoryBarrier.dstAccessMask = dstAccessMask;
|
||||
bufferMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
commandBuffer.pipelineBarrier(srcStageMask,
|
||||
dstStageMask,
|
||||
vk::DependencyFlags(),
|
||||
nullptr,
|
||||
bufferMemoryBarrier,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
vk::DescriptorBufferInfo
|
||||
Tensor::constructDescriptorBufferInfo()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}",
|
||||
this->memorySize());
|
||||
vk::DeviceSize bufferSize = this->memorySize();
|
||||
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
|
||||
mOffset, // offset
|
||||
bufferSize);
|
||||
}
|
||||
|
||||
vk::BufferUsageFlags
|
||||
Tensor::getPrimaryBufferUsageFlags()
|
||||
{
|
||||
switch (this->mTensorType) {
|
||||
case TensorTypes::eDevice:
|
||||
return vk::BufferUsageFlagBits::eStorageBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst;
|
||||
break;
|
||||
case TensorTypes::eHost:
|
||||
return vk::BufferUsageFlagBits::eStorageBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst;
|
||||
break;
|
||||
case TensorTypes::eStorage:
|
||||
return vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Kompute Tensor invalid tensor type");
|
||||
}
|
||||
}
|
||||
|
||||
vk::MemoryPropertyFlags
|
||||
Tensor::getPrimaryMemoryPropertyFlags()
|
||||
{
|
||||
switch (this->mTensorType) {
|
||||
case TensorTypes::eDevice:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
break;
|
||||
case TensorTypes::eHost:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
break;
|
||||
case TensorTypes::eStorage:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Kompute Tensor invalid tensor type");
|
||||
}
|
||||
}
|
||||
|
||||
vk::BufferUsageFlags
|
||||
Tensor::getStagingBufferUsageFlags()
|
||||
{
|
||||
switch (this->mTensorType) {
|
||||
case TensorTypes::eDevice:
|
||||
return vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Kompute Tensor invalid tensor type");
|
||||
}
|
||||
}
|
||||
|
||||
vk::MemoryPropertyFlags
|
||||
Tensor::getStagingMemoryPropertyFlags()
|
||||
{
|
||||
switch (this->mTensorType) {
|
||||
case TensorTypes::eDevice:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("Kompute Tensor invalid tensor type");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::setGPUResources(vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize /*offset*/)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor creating buffer");
|
||||
|
||||
if (!this->mPhysicalDevice) {
|
||||
throw std::runtime_error("Kompute Tensor phyisical device is null");
|
||||
}
|
||||
if (!this->mDevice) {
|
||||
throw std::runtime_error("Kompute Tensor device is null");
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor creating primary buffer and memory");
|
||||
|
||||
this->mPrimaryBuffer = primaryBuffer;
|
||||
this->mPrimaryMemory = primaryMemory;
|
||||
|
||||
if (this->mTensorType == TensorTypes::eDevice) {
|
||||
KP_LOG_DEBUG("Kompute Tensor creating staging buffer and memory");
|
||||
|
||||
this->mStagingBuffer = stagingBuffer;
|
||||
this->mStagingMemory = stagingMemory;
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor buffer & memory creation successful");
|
||||
}
|
||||
|
||||
void
|
||||
Tensor::destroy()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Tensor started destroy()");
|
||||
|
||||
// Setting raw data to null regardless whether device is available to
|
||||
// invalidate Tensor
|
||||
this->mRawData = nullptr;
|
||||
this->mSize = 0;
|
||||
this->mMemorySize = 0;
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN(
|
||||
"Kompute Tensor destructor reached with null Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mDevice) {
|
||||
this->mDevice = nullptr;
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Tensor successful destroy()");
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<bool>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eBool;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<int32_t>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<uint32_t>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eUnsignedInt;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<float>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eFloat;
|
||||
}
|
||||
|
||||
template<>
|
||||
Tensor::TensorDataTypes
|
||||
TensorT<double>::dataType()
|
||||
{
|
||||
return Tensor::TensorDataTypes::eDouble;
|
||||
}
|
||||
|
||||
}
|
46
kompute/src/include/CMakeLists.txt
Normal file
46
kompute/src/include/CMakeLists.txt
Normal file
|
@ -0,0 +1,46 @@
|
|||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
# ####################################################
|
||||
# Kompute
|
||||
# ####################################################
|
||||
target_include_directories(kompute PUBLIC $<INSTALL_INTERFACE:include>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||
|
||||
target_sources(kompute PRIVATE
|
||||
|
||||
# Header files (useful in IDEs)
|
||||
kompute/Algorithm.hpp
|
||||
kompute/Core.hpp
|
||||
kompute/Kompute.hpp
|
||||
kompute/Manager.hpp
|
||||
kompute/Sequence.hpp
|
||||
kompute/Tensor.hpp
|
||||
|
||||
kompute/operations/OpAlgoDispatch.hpp
|
||||
kompute/operations/OpBase.hpp
|
||||
kompute/operations/OpMemoryBarrier.hpp
|
||||
kompute/operations/OpMult.hpp
|
||||
kompute/operations/OpTensorCopy.hpp
|
||||
kompute/operations/OpTensorSyncDevice.hpp
|
||||
kompute/operations/OpTensorSyncLocal.hpp
|
||||
kompute/operations/OpBufferSyncDevice.hpp
|
||||
kompute/operations/OpBufferSyncLocal.hpp
|
||||
|
||||
kompute/logger/Logger.hpp
|
||||
)
|
||||
|
||||
install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
# ####################################################
|
||||
# Logger
|
||||
# ####################################################
|
||||
target_include_directories(kp_logger PUBLIC $<INSTALL_INTERFACE:include>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||
|
||||
target_sources(kp_logger PRIVATE
|
||||
|
||||
# Header files (useful in IDEs)
|
||||
kompute/logger/Logger.hpp
|
||||
)
|
||||
|
||||
install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
338
kompute/src/include/kompute/Algorithm.hpp
Normal file
338
kompute/src/include/kompute/Algorithm.hpp
Normal file
|
@ -0,0 +1,338 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "fmt/format.h"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Abstraction for compute shaders that are run on top of tensors grouped via
|
||||
ParameterGroups (which group descriptorsets)
|
||||
*/
|
||||
class Algorithm
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Main constructor for algorithm with configuration parameters to create
|
||||
* the underlying resources.
|
||||
*
|
||||
* @param device The Vulkan device to use for creating resources
|
||||
* @param tensors (optional) The tensors to use to create the descriptor
|
||||
* resources
|
||||
* @param spirv (optional) The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The templatable param is to be
|
||||
* used to initialize the specialization constants which cannot be changed
|
||||
* once set.
|
||||
* @param pushConstants (optional) This templatable param is to be used
|
||||
* when initializing the pipeline, which set the size of the push constants
|
||||
* - these can be modified but all new values must have the same data type
|
||||
* and length as otherwise it will result in errors.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
Algorithm(std::shared_ptr<vk::Device> device,
|
||||
vk::DescriptorPool *pool,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||
|
||||
this->mDevice = device;
|
||||
this->mDescriptorPool = pool;
|
||||
|
||||
if (tensors.size() && spirv.size()) {
|
||||
KP_LOG_INFO(
|
||||
"Kompute Algorithm initialising with tensor size: {} and "
|
||||
"spirv size: {}",
|
||||
tensors.size(),
|
||||
spirv.size());
|
||||
this->rebuild(tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants);
|
||||
} else {
|
||||
KP_LOG_INFO(
|
||||
"Kompute Algorithm constructor with empty tensors and or "
|
||||
"spirv so not rebuilding vulkan components");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild function to reconstruct algorithm with configuration parameters
|
||||
* to create the underlying resources.
|
||||
*
|
||||
* @param tensors The tensors to use to create the descriptor resources
|
||||
* @param spirv The spirv code to use to create the algorithm
|
||||
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
|
||||
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
|
||||
* @param specializationConstants (optional) The std::vector<float> to use
|
||||
* to initialize the specialization constants which cannot be changed once
|
||||
* set.
|
||||
* @param pushConstants (optional) The std::vector<float> to use when
|
||||
* initializing the pipeline, which set the size of the push constants -
|
||||
* these can be modified but all new values must have the same vector size
|
||||
* as this initial value.
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup = {},
|
||||
const std::vector<S>& specializationConstants = {},
|
||||
const std::vector<P>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
|
||||
|
||||
this->mTensors = tensors;
|
||||
this->mSpirv = spirv;
|
||||
|
||||
if (specializationConstants.size()) {
|
||||
if (this->mSpecializationConstantsData) {
|
||||
free(this->mSpecializationConstantsData);
|
||||
}
|
||||
uint32_t memorySize =
|
||||
sizeof(decltype(specializationConstants.back()));
|
||||
uint32_t size = specializationConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mSpecializationConstantsData = malloc(totalSize);
|
||||
memcpy(this->mSpecializationConstantsData,
|
||||
specializationConstants.data(),
|
||||
totalSize);
|
||||
this->mSpecializationConstantsDataTypeMemorySize = memorySize;
|
||||
this->mSpecializationConstantsSize = size;
|
||||
}
|
||||
|
||||
if (pushConstants.size()) {
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
this->setWorkgroup(
|
||||
workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
// Descriptor pool is created first so if available then destroy all
|
||||
// before rebuild
|
||||
if (this->isInit()) {
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
this->createParameters();
|
||||
this->createShaderModule();
|
||||
this->createPipeline();
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor for Algorithm which is responsible for freeing and desroying
|
||||
* respective pipelines and owned parameter groups.
|
||||
*/
|
||||
~Algorithm();
|
||||
|
||||
/**
|
||||
* Records the dispatch function with the provided template parameters or
|
||||
* alternatively using the size of the tensor by default.
|
||||
*
|
||||
* @param commandBuffer Command buffer to record the algorithm resources to
|
||||
*/
|
||||
void recordDispatch(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records command that binds the "core" algorithm components which consist
|
||||
* of binding the pipeline and binding the descriptorsets.
|
||||
*
|
||||
* @param commandBuffer Command buffer to record the algorithm resources to
|
||||
*/
|
||||
void recordBindCore(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records command that binds the push constants to the command buffer
|
||||
* provided
|
||||
* - it is required that the pushConstants provided are of the same size as
|
||||
* the ones provided during initialization.
|
||||
*
|
||||
* @param commandBuffer Command buffer to record the algorithm resources to
|
||||
*/
|
||||
void recordBindPush(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* function that checks all the gpu resource components to verify if these
|
||||
* have been created and returns true if all are valid.
|
||||
*
|
||||
* @returns returns true if the algorithm is currently initialized.
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Sets the work group to use in the recordDispatch
|
||||
*
|
||||
* @param workgroup The kp::Workgroup value to use to update the algorithm.
|
||||
* It must have a value greater than 1 on the x value (index 1) otherwise it
|
||||
* will be initialized on the size of the first tensor (ie.
|
||||
* this->mTensor[0]->size())
|
||||
*/
|
||||
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
|
||||
/**
|
||||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush()
|
||||
*
|
||||
* @param pushConstants The templatable vector is to be used to set the push
|
||||
* constants to use in the next bindPush(...) calls. The constants provided
|
||||
* must be of the same size as the ones created during initialization.
|
||||
*/
|
||||
template<typename T>
|
||||
void setPushConstants(const std::vector<T>& pushConstants)
|
||||
{
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
this->setPushConstants(pushConstants.data(), size, memorySize);
|
||||
}
|
||||
|
||||
void updateDescriptors(vk::DescriptorPool *pool)
|
||||
{
|
||||
this->mDescriptorPool = pool;
|
||||
this->setWorkgroup(
|
||||
this->mWorkgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
|
||||
|
||||
this->updateParameters(); // TODO: See if we can reduce this
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the push constants to the new value provided to use in the next
|
||||
* bindPush() with the raw memory block location and memory size to be used.
|
||||
*
|
||||
* @param data The raw data point to copy the data from, without modifying
|
||||
* the pointer.
|
||||
* @param size The number of data elements provided in the data
|
||||
* @param memorySize The memory size of each of the data elements in bytes.
|
||||
*/
|
||||
void setPushConstants(const void* data, uint32_t size, uint32_t memorySize)
|
||||
{
|
||||
|
||||
uint32_t totalSize = memorySize * size;
|
||||
uint32_t previousTotalSize =
|
||||
this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
|
||||
|
||||
if (totalSize != previousTotalSize) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"Kompute Algorithm push "
|
||||
"constant total memory size provided is {} but expected {} bytes",
|
||||
totalSize,
|
||||
previousTotalSize));
|
||||
}
|
||||
if (this->mPushConstantsData) {
|
||||
free(this->mPushConstantsData);
|
||||
}
|
||||
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, data, totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current workgroup from the algorithm.
|
||||
*
|
||||
* @param The kp::Constant to use to set the push constants to use in the
|
||||
* next bindPush(...) calls. The constants provided must be of the same size
|
||||
* as the ones created during initialization.
|
||||
*/
|
||||
const Workgroup& getWorkgroup();
|
||||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The std::vector<float> currently set for specialization
|
||||
* constants
|
||||
*/
|
||||
template<typename T>
|
||||
const std::vector<T> getSpecializationConstants()
|
||||
{
|
||||
return { (T*)this->mSpecializationConstantsData,
|
||||
((T*)this->mSpecializationConstantsData) +
|
||||
this->mSpecializationConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the specialization constants of the current algorithm.
|
||||
*
|
||||
* @returns The std::vector<float> currently set for push constants
|
||||
*/
|
||||
template<typename T>
|
||||
const std::vector<T> getPushConstants()
|
||||
{
|
||||
return { (T*)this->mPushConstantsData,
|
||||
((T*)this->mPushConstantsData) + this->mPushConstantsSize };
|
||||
}
|
||||
/**
|
||||
* Gets the current tensors that are used in the algorithm.
|
||||
*
|
||||
* @returns The list of tensors used in the algorithm.
|
||||
*/
|
||||
const std::vector<std::shared_ptr<Tensor>>& getTensors();
|
||||
void setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
|
||||
bool mFreeDescriptorSetLayout = false;
|
||||
vk::DescriptorPool *mDescriptorPool = nullptr;
|
||||
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
|
||||
bool mFreeDescriptorSet = false;
|
||||
std::shared_ptr<vk::ShaderModule> mShaderModule;
|
||||
bool mFreeShaderModule = false;
|
||||
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
|
||||
bool mFreePipelineLayout = false;
|
||||
std::shared_ptr<vk::PipelineCache> mPipelineCache;
|
||||
bool mFreePipelineCache = false;
|
||||
std::shared_ptr<vk::Pipeline> mPipeline;
|
||||
bool mFreePipeline = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<uint32_t> mSpirv;
|
||||
void* mSpecializationConstantsData = nullptr;
|
||||
uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mSpecializationConstantsSize = 0;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
Workgroup mWorkgroup;
|
||||
|
||||
// Create util functions
|
||||
void createShaderModule();
|
||||
void createPipeline();
|
||||
|
||||
// Parameters
|
||||
void freeParameters();
|
||||
void createParameters();
|
||||
void updateParameters();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
39
kompute/src/include/kompute/Core.hpp
Normal file
39
kompute/src/include/kompute/Core.hpp
Normal file
|
@ -0,0 +1,39 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
// Typedefs to simplify interaction with core types
|
||||
namespace kp {
|
||||
typedef std::array<uint32_t, 3> Workgroup;
|
||||
typedef std::vector<float> Constants;
|
||||
}
|
||||
|
||||
// Must be after vulkan is included
|
||||
#ifndef KOMPUTE_VK_API_VERSION
|
||||
#ifndef KOMPUTE_VK_API_MAJOR_VERSION
|
||||
#define KOMPUTE_VK_API_MAJOR_VERSION 1
|
||||
#endif // KOMPUTE_VK_API_MAJOR_VERSION
|
||||
#ifndef KOMPUTE_VK_API_MINOR_VERSION
|
||||
#define KOMPUTE_VK_API_MINOR_VERSION 2
|
||||
#endif // KOMPUTE_VK_API_MINOR_VERSION
|
||||
#define KOMPUTE_VK_API_VERSION \
|
||||
VK_MAKE_VERSION( \
|
||||
KOMPUTE_VK_API_MAJOR_VERSION, KOMPUTE_VK_API_MINOR_VERSION, 0)
|
||||
#endif // KOMPUTE_VK_API_VERSION
|
||||
|
||||
#if defined(KOMPUTE_BUILD_PYTHON)
|
||||
#include <pybind11/pybind11.h>
|
||||
namespace py = pybind11;
|
||||
// from python/src/main.cpp
|
||||
extern py::object kp_trace, kp_debug, kp_info, kp_warning, kp_error;
|
||||
#endif
|
21
kompute/src/include/kompute/Kompute.hpp
Normal file
21
kompute/src/include/kompute/Kompute.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "Algorithm.hpp"
|
||||
#include "Core.hpp"
|
||||
#include "Manager.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Tensor.hpp"
|
||||
|
||||
#include "operations/OpAlgoDispatch.hpp"
|
||||
#include "operations/OpBase.hpp"
|
||||
#include "operations/OpMemoryBarrier.hpp"
|
||||
#include "operations/OpMult.hpp"
|
||||
#include "operations/OpTensorCopy.hpp"
|
||||
#include "operations/OpTensorSyncDevice.hpp"
|
||||
#include "operations/OpTensorSyncLocal.hpp"
|
||||
#include "operations/OpBufferSyncDevice.hpp"
|
||||
#include "operations/OpBufferSyncLocal.hpp"
|
||||
|
||||
// Will be build by CMake and placed inside the build directory
|
||||
#include "ShaderLogisticRegression.hpp"
|
||||
#include "ShaderOpMult.hpp"
|
267
kompute/src/include/kompute/Manager.hpp
Normal file
267
kompute/src/include/kompute/Manager.hpp
Normal file
|
@ -0,0 +1,267 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Sequence.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
|
||||
#define KP_DEFAULT_SESSION "DEFAULT"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
Base orchestrator which creates and manages device and child components
|
||||
*/
|
||||
class Manager
|
||||
{
|
||||
public:
|
||||
/**
|
||||
Base constructor.
|
||||
*/
|
||||
Manager();
|
||||
|
||||
/**
|
||||
* Manager destructor which would ensure all owned resources are destroyed
|
||||
* unless explicitly stated that resources should not be destroyed or freed.
|
||||
*/
|
||||
~Manager();
|
||||
|
||||
bool hasDevice() const {
|
||||
return this->mDevice.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a device.
|
||||
*
|
||||
* @param physicalDeviceIndex The index of the physical device to use
|
||||
* @param familyQueueIndices (Optional) List of queue indices to add for
|
||||
* explicit allocation
|
||||
* @param desiredExtensions The desired extensions to load from
|
||||
* physicalDevice
|
||||
*/
|
||||
void initializeDevice(uint32_t physicalDeviceIndex,
|
||||
const std::vector<uint32_t>& familyQueueIndices = {},
|
||||
const std::vector<std::string>& desiredExtensions = {});
|
||||
|
||||
/**
|
||||
* Create a managed sequence that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
*
|
||||
* @param queueIndex The queue to use from the available queues
|
||||
* @param nrOfTimestamps The maximum number of timestamps to allocate.
|
||||
* If zero (default), disables latching of timestamps.
|
||||
* @returns Shared pointer with initialised sequence
|
||||
*/
|
||||
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
|
||||
uint32_t totalTimestamps = 0);
|
||||
|
||||
/**
|
||||
* Create a managed tensor that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
*
|
||||
* @param data The data to initialize the tensor with
|
||||
* @param tensorType The type of tensor to initialize
|
||||
* @returns Shared pointer with initialised tensor
|
||||
*/
|
||||
template<typename T>
|
||||
std::shared_ptr<TensorT<T>> tensorT(
|
||||
const std::vector<T>& data,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
|
||||
|
||||
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
|
||||
this->mPhysicalDevice, this->mDevice, data, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> tensor(
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint64_t memorySize,
|
||||
const Tensor::TensorDataTypes& dataType,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset,
|
||||
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
|
||||
{
|
||||
std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
|
||||
this->mDevice,
|
||||
data,
|
||||
elementTotalCount,
|
||||
memorySize,
|
||||
dataType,
|
||||
primaryMemory,
|
||||
primaryBuffer,
|
||||
stagingMemory,
|
||||
stagingBuffer,
|
||||
offset,
|
||||
tensorType) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedTensors.push_back(tensor);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default non-template function that can be used to create algorithm
|
||||
* objects which provides default types to the push and spec constants as
|
||||
* floats.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) float vector to use for
|
||||
* specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) float vector to use for push constants,
|
||||
* and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
vk::DescriptorPool *pool,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||
const std::vector<uint32_t>& spirv = {},
|
||||
const Workgroup& workgroup = {},
|
||||
const std::vector<float>& specializationConstants = {},
|
||||
const std::vector<float>& pushConstants = {})
|
||||
{
|
||||
return this->algorithm<>(
|
||||
pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a managed algorithm that will be destroyed by this manager
|
||||
* if it hasn't been destroyed by its reference count going to zero.
|
||||
*
|
||||
* @param tensors (optional) The tensors to initialise the algorithm with
|
||||
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
|
||||
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
|
||||
* defaults to (tensor[0].size(), 1, 1)
|
||||
* @param specializationConstants (optional) templatable vector parameter to
|
||||
* use for specialization constants, and defaults to an empty constant
|
||||
* @param pushConstants (optional) templatable vector parameter to use for
|
||||
* push constants, and defaults to an empty constant
|
||||
* @returns Shared pointer with initialised algorithm
|
||||
*/
|
||||
template<typename S = float, typename P = float>
|
||||
std::shared_ptr<Algorithm> algorithm(
|
||||
vk::DescriptorPool *pool,
|
||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const std::vector<uint32_t>& spirv,
|
||||
const Workgroup& workgroup,
|
||||
const std::vector<S>& specializationConstants,
|
||||
const std::vector<P>& pushConstants)
|
||||
{
|
||||
|
||||
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
|
||||
|
||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||
this->mDevice,
|
||||
pool,
|
||||
tensors,
|
||||
spirv,
|
||||
workgroup,
|
||||
specializationConstants,
|
||||
pushConstants) };
|
||||
|
||||
if (this->mManageResources) {
|
||||
this->mManagedAlgorithms.push_back(algorithm);
|
||||
}
|
||||
|
||||
return algorithm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy the GPU resources and all managed resources by manager.
|
||||
**/
|
||||
void destroy();
|
||||
/**
|
||||
* Run a pseudo-garbage collection to release all the managed resources
|
||||
* that have been already freed due to these reaching to zero ref count.
|
||||
**/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Information about the current device.
|
||||
*
|
||||
* @return vk::PhysicalDeviceProperties containing information about the
|
||||
*device
|
||||
**/
|
||||
vk::PhysicalDeviceProperties getDeviceProperties() const;
|
||||
|
||||
/**
|
||||
* List the devices available in the current vulkan instance.
|
||||
*
|
||||
* @return vector of physical devices containing their respective properties
|
||||
**/
|
||||
std::vector<vk::PhysicalDevice> listDevices() const;
|
||||
|
||||
/**
|
||||
* The current Vulkan instance.
|
||||
*
|
||||
* @return a shared pointer to the current Vulkan instance held by this
|
||||
*object
|
||||
**/
|
||||
std::shared_ptr<vk::Instance> getVkInstance() const;
|
||||
|
||||
std::shared_ptr<vk::Device> device() const { return mDevice; }
|
||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
|
||||
|
||||
private:
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::Instance> mInstance = nullptr;
|
||||
bool mFreeInstance = false;
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
bool mFreeDevice = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
|
||||
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
|
||||
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
||||
|
||||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||
|
||||
bool mManageResources = false;
|
||||
|
||||
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
|
||||
vk::DebugReportCallbackEXT mDebugReportCallback;
|
||||
vk::DispatchLoaderDynamic mDebugDispatcher;
|
||||
#endif
|
||||
|
||||
// Create functions
|
||||
void createInstance();
|
||||
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
|
||||
uint32_t physicalDeviceIndex = 0,
|
||||
const std::vector<std::string>& desiredExtensions = {});
|
||||
};
|
||||
|
||||
} // End namespace kp
|
313
kompute/src/include/kompute/Sequence.hpp
Normal file
313
kompute/src/include/kompute/Sequence.hpp
Normal file
|
@ -0,0 +1,313 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Container of operations that can be sent to GPU as batch
|
||||
*/
|
||||
class Sequence : public std::enable_shared_from_this<Sequence>
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Main constructor for sequence which requires core vulkan components to
|
||||
* generate all dependent resources.
|
||||
*
|
||||
* @param physicalDevice Vulkan physical device
|
||||
* @param device Vulkan logical device
|
||||
* @param computeQueue Vulkan compute queue
|
||||
* @param queueIndex Vulkan compute queue index in device
|
||||
* @param totalTimestamps Maximum number of timestamps to allocate
|
||||
*/
|
||||
Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
std::shared_ptr<vk::Queue> computeQueue,
|
||||
uint32_t queueIndex,
|
||||
uint32_t totalTimestamps = 0);
|
||||
/**
|
||||
* Destructor for sequence which is responsible for cleaning all subsequent
|
||||
* owned operations.
|
||||
*/
|
||||
~Sequence();
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param op Object derived from kp::BaseOp that will be recoreded by the
|
||||
* sequence which will be used when the operation is evaluated.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param tensors Vector of tensors to use for the operation
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> record(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
return this->record(op);
|
||||
}
|
||||
/**
|
||||
* Record function for operation to be added to the GPU queue in batch. This
|
||||
* template requires classes to be derived from the OpBase class. This
|
||||
* function also requires the Sequence to be recording, otherwise it will
|
||||
* not be able to add the operation.
|
||||
*
|
||||
* @param algorithm Algorithm to use for the record often used for OpAlgo
|
||||
* operations
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
return this->record(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job synchronously (with a barrier).
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval();
|
||||
|
||||
/**
|
||||
* Resets all the recorded and stored operations, records the operation
|
||||
* provided and submits into the gpu as a submit job synchronously (with a
|
||||
* barrier).
|
||||
*
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @param tensors Vector of tensors to use for the operation
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> eval(vk::Buffer *primaryBuffer,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize size,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(primaryBuffer, stagingBuffer, size, std::forward<TArgs>(params)...) };
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @param algorithm Algorithm to use for the record often used for OpAlgo
|
||||
* operations
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
return this->eval(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Async sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait()
|
||||
* must ALWAYS be called after to ensure the sequence is terminated
|
||||
* correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync();
|
||||
/**
|
||||
* Clears currnet operations to record provided one in the vector of
|
||||
* operations into the gpu as a submit job without a barrier. EvalAwait()
|
||||
* must ALWAYS be called after to ensure the sequence is terminated
|
||||
* correctly.
|
||||
*
|
||||
* @return Boolean stating whether execution was successful.
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @param tensors Vector of tensors to use for the operation
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> evalAsync(
|
||||
std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
/**
|
||||
* Eval sends all the recorded and stored operations in the vector of
|
||||
* operations into the gpu as a submit job with a barrier.
|
||||
*
|
||||
* @param algorithm Algorithm to use for the record often used for OpAlgo
|
||||
* operations
|
||||
* @param TArgs Template parameters that are used to initialise operation
|
||||
* which allows for extensible configurations on initialisation.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
template<typename T, typename... TArgs>
|
||||
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
|
||||
TArgs&&... params)
|
||||
{
|
||||
std::shared_ptr<T> op{ new T(algorithm,
|
||||
std::forward<TArgs>(params)...) };
|
||||
return this->evalAsync(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eval Await waits for the fence to finish processing and then once it
|
||||
* finishes, it runs the postEval of all operations.
|
||||
*
|
||||
* @param waitFor Number of milliseconds to wait before timing out.
|
||||
* @return shared_ptr<Sequence> of the Sequence class itself
|
||||
*/
|
||||
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
|
||||
|
||||
/**
|
||||
* Clear function clears all operations currently recorded and starts
|
||||
* recording again.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Return the timestamps that were latched at the beginning and
|
||||
* after each operation during the last eval() call.
|
||||
*/
|
||||
std::vector<std::uint64_t> getTimestamps();
|
||||
|
||||
/**
|
||||
* Begins recording commands for commands to be submitted into the command
|
||||
* buffer.
|
||||
*/
|
||||
void begin();
|
||||
|
||||
/**
|
||||
* Ends the recording and stops recording commands when the record command
|
||||
* is sent.
|
||||
*/
|
||||
void end();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently in recording activated.
|
||||
*
|
||||
* @return Boolean stating if recording ongoing.
|
||||
*/
|
||||
bool isRecording() const;
|
||||
|
||||
/**
|
||||
* Returns true if the sequence has been initialised, and it's based on the
|
||||
* GPU resources being referenced.
|
||||
*
|
||||
* @return Boolean stating if is initialized
|
||||
*/
|
||||
bool isInit() const;
|
||||
|
||||
/**
|
||||
* Clears command buffer and triggers re-record of all the current
|
||||
* operations saved, which is useful if the underlying kp::Tensors or
|
||||
* kp::Algorithms are modified and need to be re-recorded.
|
||||
*/
|
||||
void rerecord();
|
||||
|
||||
/**
|
||||
* Returns true if the sequence is currently running - mostly used for async
|
||||
* workloads.
|
||||
*
|
||||
* @return Boolean stating if currently running.
|
||||
*/
|
||||
bool isRunning() const;
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory
|
||||
* and sets the sequence as init=False.
|
||||
*/
|
||||
void destroy();
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
|
||||
std::shared_ptr<vk::Device> mDevice = nullptr;
|
||||
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
|
||||
uint32_t mQueueIndex = -1;
|
||||
|
||||
// -------------- OPTIONALLY OWNED RESOURCES
|
||||
std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
|
||||
bool mFreeCommandPool = false;
|
||||
std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
|
||||
bool mFreeCommandBuffer = false;
|
||||
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
vk::Fence mFence;
|
||||
std::vector<std::shared_ptr<OpBase>> mOperations{};
|
||||
std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
|
||||
|
||||
// State
|
||||
bool mRecording = false;
|
||||
bool mIsRunning = false;
|
||||
|
||||
// Create functions
|
||||
void createCommandPool();
|
||||
void createCommandBuffer();
|
||||
void createTimestampQueryPool(uint32_t totalTimestamps);
|
||||
};
|
||||
|
||||
} // End namespace kp
|
306
kompute/src/include/kompute/Tensor.hpp
Normal file
306
kompute/src/include/kompute/Tensor.hpp
Normal file
|
@ -0,0 +1,306 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "logger/Logger.hpp"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Structured data used in GPU operations.
|
||||
*
|
||||
* Tensors are the base building block in Kompute to perform operations across
|
||||
* GPUs. Each tensor would have a respective Vulkan memory and buffer, which
|
||||
* would be used to store their respective data. The tensors can be used for GPU
|
||||
* data storage or transfer.
|
||||
*/
|
||||
class Tensor
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Type for tensors created: Device allows memory to be transferred from
|
||||
* staging buffers. Staging are host memory visible. Storage are device
|
||||
* visible but are not set up to transfer or receive data (only for shader
|
||||
* storage).
|
||||
*/
|
||||
enum class TensorTypes
|
||||
{
|
||||
eDevice = 0, ///< Type is device memory, source and destination
|
||||
eHost = 1, ///< Type is host memory, source and destination
|
||||
eStorage = 2, ///< Type is Device memory (only)
|
||||
};
|
||||
enum class TensorDataTypes
|
||||
{
|
||||
eBool = 0,
|
||||
eInt = 1,
|
||||
eUnsignedInt = 2,
|
||||
eFloat = 3,
|
||||
eDouble = 4,
|
||||
};
|
||||
|
||||
static std::string toString(TensorDataTypes dt);
|
||||
static std::string toString(TensorTypes dt);
|
||||
|
||||
/**
|
||||
* Constructor with data provided which would be used to create the
|
||||
* respective vulkan buffer and memory.
|
||||
*
|
||||
* @param physicalDevice The physical device to use to fetch properties
|
||||
* @param device The device to use to create the buffer and memory from
|
||||
* @param data Non-zero-sized vector of data that will be used by the
|
||||
* tensor
|
||||
* @param tensorTypes Type for the tensor which is of type TensorTypes
|
||||
*/
|
||||
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
|
||||
std::shared_ptr<vk::Device> device,
|
||||
void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint32_t memorySize,
|
||||
const TensorDataTypes& dataType,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset,
|
||||
const TensorTypes& tensorType = TensorTypes::eDevice);
|
||||
|
||||
/**
|
||||
* Destructor which is in charge of freeing vulkan resources unless they
|
||||
* have been provided externally.
|
||||
*/
|
||||
virtual ~Tensor();
|
||||
|
||||
/**
|
||||
* Function to trigger reinitialisation of the tensor buffer and memory with
|
||||
* new data as well as new potential device type.
|
||||
*
|
||||
* @param data Vector of data to use to initialise vector from
|
||||
* @param tensorType The type to use for the tensor
|
||||
*/
|
||||
void rebuild(void* data,
|
||||
uint32_t elementTotalCount,
|
||||
uint64_t memorySize,
|
||||
vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset);
|
||||
|
||||
/**
|
||||
* Destroys and frees the GPU resources which include the buffer and memory.
|
||||
*/
|
||||
void destroy();
|
||||
|
||||
/**
|
||||
* Check whether tensor is initialized based on the created gpu resources.
|
||||
*
|
||||
* @returns Boolean stating whether tensor is initialized
|
||||
*/
|
||||
bool isInit();
|
||||
|
||||
/**
|
||||
* Retrieve the tensor type of the Tensor
|
||||
*
|
||||
* @return Tensor type of tensor
|
||||
*/
|
||||
TensorTypes tensorType();
|
||||
|
||||
/**
|
||||
* Records a copy from the memory of the tensor provided to the current
|
||||
* thensor. This is intended to pass memory into a processing, to perform
|
||||
* a staging buffer transfer, or to gather output (between others).
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param copyFromTensor Tensor to copy the data from
|
||||
*/
|
||||
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
|
||||
std::shared_ptr<Tensor> copyFromTensor);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal staging memory to the device memory
|
||||
* using an optional barrier to wait for the operation. This function would
|
||||
* only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
*/
|
||||
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records a copy from the internal device memory to the staging memory
|
||||
* using an optional barrier to wait for the operation. This function would
|
||||
* only be relevant for kp::Tensors of type eDevice.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
*/
|
||||
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
|
||||
|
||||
/**
|
||||
* Records the buffer memory barrier into the primary buffer and command
|
||||
* buffer which ensures that relevant data transfers are carried out
|
||||
* correctly.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param srcAccessMask Access flags for source access mask
|
||||
* @param dstAccessMask Access flags for destination access mask
|
||||
* @param scrStageMask Pipeline stage flags for source stage mask
|
||||
* @param dstStageMask Pipeline stage flags for destination stage mask
|
||||
*/
|
||||
void recordPrimaryBufferMemoryBarrier(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
/**
|
||||
* Records the buffer memory barrier into the staging buffer and command
|
||||
* buffer which ensures that relevant data transfers are carried out
|
||||
* correctly.
|
||||
*
|
||||
* @param commandBuffer Vulkan Command Buffer to record the commands into
|
||||
* @param srcAccessMask Access flags for source access mask
|
||||
* @param dstAccessMask Access flags for destination access mask
|
||||
* @param scrStageMask Pipeline stage flags for source stage mask
|
||||
* @param dstStageMask Pipeline stage flags for destination stage mask
|
||||
*/
|
||||
void recordStagingBufferMemoryBarrier(
|
||||
const vk::CommandBuffer& commandBuffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
|
||||
/**
|
||||
* Constructs a vulkan descriptor buffer info which can be used to specify
|
||||
* and reference the underlying buffer component of the tensor without
|
||||
* exposing it.
|
||||
*
|
||||
* @return Descriptor buffer info with own buffer
|
||||
*/
|
||||
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
|
||||
|
||||
/**
|
||||
* Returns the size/magnitude of the Tensor, which will be the total number
|
||||
* of elements across all dimensions
|
||||
*
|
||||
* @return Unsigned integer representing the total number of elements
|
||||
*/
|
||||
uint32_t size();
|
||||
|
||||
/**
|
||||
* Returns the total memory size of the data contained by the Tensor object
|
||||
*
|
||||
* @return Unsigned integer representing the memory of the tensor in bytes.
|
||||
*/
|
||||
uint64_t memorySize();
|
||||
|
||||
/**
|
||||
* Retrieve the data type of the tensor (host, device, storage)
|
||||
*
|
||||
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
|
||||
*/
|
||||
TensorDataTypes dataType();
|
||||
|
||||
/**
|
||||
* Retrieve the raw data via the pointer to the memory that contains the raw
|
||||
* memory of this current tensor. This tensor gets changed to a nullptr when
|
||||
* the Tensor is removed.
|
||||
*
|
||||
* @return Pointer to raw memory containing raw bytes data of Tensor.
|
||||
*/
|
||||
void* rawData();
|
||||
|
||||
/**
|
||||
* Sets / resets the data of the tensor which is directly done on the GPU
|
||||
* host visible memory available by the tensor.
|
||||
*/
|
||||
void setRawData(const void* data);
|
||||
|
||||
/**
|
||||
* Template to return the pointer data converted by specific type, which
|
||||
* would be any of the supported types including float, double, int32,
|
||||
* uint32 and bool.
|
||||
*
|
||||
* @return Pointer to raw memory containing raw bytes data of Tensor.
|
||||
*/
|
||||
template<typename T>
|
||||
T* data()
|
||||
{
|
||||
return (T*)this->mRawData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Template to get the data of the current tensor as a vector of specific
|
||||
* type, which would be any of the supported types including float, double,
|
||||
* int32, uint32 and bool.
|
||||
*
|
||||
* @return Vector of type provided by template.
|
||||
*/
|
||||
template<typename T>
|
||||
std::vector<T> vector()
|
||||
{
|
||||
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
|
||||
}
|
||||
|
||||
protected:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
TensorTypes mTensorType;
|
||||
TensorDataTypes mDataType;
|
||||
uint32_t mSize = 0;
|
||||
uint64_t mMemorySize = 0;
|
||||
vk::DeviceSize mOffset = 0;
|
||||
void* mRawData = nullptr;
|
||||
|
||||
private:
|
||||
// -------------- NEVER OWNED RESOURCES
|
||||
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
|
||||
std::shared_ptr<vk::Device> mDevice;
|
||||
vk::Buffer *mPrimaryBuffer = nullptr;
|
||||
vk::Buffer *mStagingBuffer = nullptr;
|
||||
vk::DeviceMemory *mPrimaryMemory = nullptr;
|
||||
vk::DeviceMemory *mStagingMemory = nullptr;
|
||||
|
||||
void setGPUResources(vk::DeviceMemory *primaryMemory,
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::DeviceMemory *stagingMemory,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize offset);
|
||||
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
|
||||
vk::Buffer *bufferFrom,
|
||||
vk::Buffer *bufferTo,
|
||||
vk::DeviceSize bufferSize,
|
||||
vk::BufferCopy copyRegion);
|
||||
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
|
||||
const vk::Buffer& buffer,
|
||||
vk::AccessFlagBits srcAccessMask,
|
||||
vk::AccessFlagBits dstAccessMask,
|
||||
vk::PipelineStageFlagBits srcStageMask,
|
||||
vk::PipelineStageFlagBits dstStageMask);
|
||||
|
||||
// Private util functions
|
||||
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
|
||||
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
|
||||
vk::BufferUsageFlags getStagingBufferUsageFlags();
|
||||
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class TensorT : public Tensor
|
||||
{
|
||||
|
||||
public:
|
||||
~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); }
|
||||
|
||||
TensorDataTypes dataType();
|
||||
};
|
||||
|
||||
} // End namespace kp
|
197
kompute/src/include/kompute/logger/Logger.hpp
Normal file
197
kompute/src/include/kompute/logger/Logger.hpp
Normal file
|
@ -0,0 +1,197 @@
|
|||
#pragma once
|
||||
|
||||
#define KOMPUTE_LOG_LEVEL_TRACE 0
|
||||
#define KOMPUTE_LOG_LEVEL_DEBUG 1
|
||||
#define KOMPUTE_LOG_LEVEL_INFO 2
|
||||
#define KOMPUTE_LOG_LEVEL_WARN 3
|
||||
#define KOMPUTE_LOG_LEVEL_ERROR 4
|
||||
#define KOMPUTE_LOG_LEVEL_CRITICAL 5
|
||||
#define KOMPUTE_LOG_LEVEL_OFF 6
|
||||
|
||||
// Logging is disabled entirely.
|
||||
#if KOMPUTE_OPT_LOG_LEVEL_DISABLED
|
||||
#define KP_LOG_TRACE(...)
|
||||
#define KP_LOG_DEBUG(...)
|
||||
#define KP_LOG_INFO(...)
|
||||
#define KP_LOG_WARN(...)
|
||||
#define KP_LOG_ERROR(...)
|
||||
#else
|
||||
|
||||
#if !KOMPUTE_OPT_USE_SPDLOG
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#include <android/log.h>
|
||||
#include <fmt/core.h>
|
||||
static const char* KOMPUTE_LOG_TAG = "KomputeLog";
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#include <pybind11/pybind11.h>
|
||||
namespace py = pybind11;
|
||||
// from python/src/main.cpp
|
||||
extern py::object kp_trace, kp_debug, kp_info, kp_warning, kp_error;
|
||||
#else
|
||||
#include <fmt/core.h>
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#include <spdlog/spdlog.h>
|
||||
#endif // !KOMPUTE_OPT_USE_SPDLOG
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
namespace logger {
|
||||
// Setup the logger, note the loglevel can not be set below the CMake log level
|
||||
// (To change this use -DKOMPUTE_OPT_LOG_LEVEL=...)
|
||||
void
|
||||
setupLogger();
|
||||
|
||||
// Logging is enabled, but we do not use Spdlog. So we use fmt in case nothing
|
||||
// else is defined, overriding logging.
|
||||
#if !KOMPUTE_OPT_USE_SPDLOG
|
||||
|
||||
#ifndef KP_LOG_TRACE
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_TRACE
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#define KP_LOG_TRACE(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_VERBOSE, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#define KP_LOG_DEBUG(...) kp_trace(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_TRACE(...) \
|
||||
fmt::print("[{} {}] [trace] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#define KP_LOG_TRACE(...)
|
||||
#endif
|
||||
#endif // !KP_LOG_TRACE
|
||||
|
||||
#ifndef KP_LOG_DEBUG
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_DEBUG
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#ifdef __FILE_NAME__ // gcc 12 provides only file name without path
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE_NAME__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_DEBUG(...) \
|
||||
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // __FILE__NAME__
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#define KP_LOG_DEBUG(...)
|
||||
#endif
|
||||
#endif // !KP_LOG_DEBUG
|
||||
|
||||
#ifndef KP_LOG_INFO
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_INFO
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#define KP_LOG_INFO(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#define KP_LOG_DEBUG(...) kp_info(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_INFO(...) \
|
||||
fmt::print("[{} {}] [info] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#define KP_LOG_INFO(...)
|
||||
#endif
|
||||
#endif // !KP_LOG_INFO
|
||||
|
||||
#ifndef KP_LOG_WARN
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_WARN
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#define KP_LOG_WARN(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#define KP_LOG_DEBUG(...) kp_warning(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_WARN(...) \
|
||||
fmt::print("[{} {}] [warn] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#define KP_LOG_WARN(...)
|
||||
#endif
|
||||
#endif // !KP_LOG_WARN
|
||||
|
||||
#ifndef KP_LOG_ERROR
|
||||
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_ERROR
|
||||
#if VK_USE_PLATFORM_ANDROID_KHR
|
||||
#define KP_LOG_ERROR(...) \
|
||||
((void)__android_log_write( \
|
||||
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
|
||||
#else
|
||||
#if KOMPUTE_BUILD_PYTHON
|
||||
#define KP_LOG_DEBUG(...) kp_error(fmt::format(__VA_ARGS__))
|
||||
#else
|
||||
#define KP_LOG_ERROR(...) \
|
||||
fmt::print("[{} {}] [error] [{}:{}] {}\n", \
|
||||
__DATE__, \
|
||||
__TIME__, \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
fmt::format(__VA_ARGS__))
|
||||
#endif // KOMPUTE_BUILD_PYTHON
|
||||
#endif // VK_USE_PLATFORM_ANDROID_KHR
|
||||
#else
|
||||
#define KP_LOG_ERROR(...)
|
||||
#endif
|
||||
#endif // !KP_LOG_ERROR
|
||||
#else
|
||||
|
||||
#define KP_LOG_TRACE(...) SPDLOG_TRACE(__VA_ARGS__)
|
||||
#define KP_LOG_DEBUG(...) SPDLOG_DEBUG(__VA_ARGS__)
|
||||
#define KP_LOG_INFO(...) SPDLOG_INFO(__VA_ARGS__)
|
||||
#define KP_LOG_WARN(...) SPDLOG_WARN(__VA_ARGS__)
|
||||
#define KP_LOG_ERROR(...) SPDLOG_ERROR(__VA_ARGS__)
|
||||
|
||||
void
|
||||
setLogLevel(spdlog::level::level_enum level);
|
||||
|
||||
spdlog::level::level_enum
|
||||
getLogLevel();
|
||||
|
||||
#endif // !KOMPUTE_OPT_USE_SPDLOG
|
||||
} // namespace logger
|
||||
|
||||
#endif // KOMPUTE_OPT_LOG_LEVEL_DISABLED
|
86
kompute/src/include/kompute/operations/OpAlgoDispatch.hpp
Normal file
86
kompute/src/include/kompute/operations/OpAlgoDispatch.hpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* By default it enables the user to provide a dynamic number of tensors
|
||||
* which are then passed as inputs.
|
||||
*/
|
||||
class OpAlgoDispatch : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor that stores the algorithm to use as well as the relevant
|
||||
* push constants to override when recording.
|
||||
*
|
||||
* @param algorithm The algorithm object to use for dispatch
|
||||
* @param pushConstants The push constants to use for override
|
||||
*/
|
||||
template<typename T = float>
|
||||
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
|
||||
const std::vector<T>& pushConstants = {})
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
|
||||
|
||||
this->mAlgorithm = algorithm;
|
||||
|
||||
if (pushConstants.size()) {
|
||||
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
|
||||
uint32_t size = pushConstants.size();
|
||||
uint32_t totalSize = size * memorySize;
|
||||
this->mPushConstantsData = malloc(totalSize);
|
||||
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
|
||||
this->mPushConstantsDataTypeMemorySize = memorySize;
|
||||
this->mPushConstantsSize = size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
virtual ~OpAlgoDispatch() override;
|
||||
|
||||
/**
|
||||
* This records the commands that are to be sent to the GPU. This includes
|
||||
* the barriers that ensure the memory has been copied before going in and
|
||||
* out of the shader, as well as the dispatch operation that sends the
|
||||
* shader processing to the gpu. This function also records the GPU memory
|
||||
* copy of the output data for the staging buffer so it can be read by the
|
||||
* host.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::shared_ptr<Algorithm> mAlgorithm;
|
||||
void* mPushConstantsData = nullptr;
|
||||
uint32_t mPushConstantsDataTypeMemorySize = 0;
|
||||
uint32_t mPushConstantsSize = 0;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
62
kompute/src/include/kompute/operations/OpBase.hpp
Normal file
62
kompute/src/include/kompute/operations/OpBase.hpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Base Operation which provides the high level interface that Kompute
|
||||
* operations implement in order to perform a set of actions in the GPU.
|
||||
*
|
||||
* Operations can perform actions on tensors, and optionally can also own an
|
||||
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
|
||||
* would inherit from kp::OpBaseAlgo.
|
||||
*/
|
||||
class OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default destructor for OpBase class. This OpBase destructor class should
|
||||
* always be called to destroy and free owned resources unless it is
|
||||
* intended to destroy the resources in the parent class.
|
||||
*/
|
||||
virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); }
|
||||
|
||||
/**
|
||||
* The record function is intended to only send a record command or run
|
||||
* commands that are expected to record operations that are to be submitted
|
||||
* as a batch into the GPU.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Pre eval is called before the Sequence has called eval and submitted the
|
||||
* commands to the GPU for processing, and can be used to perform any
|
||||
* per-eval setup steps required as the computation iteration begins. It's
|
||||
* worth noting that there are situations where eval can be called multiple
|
||||
* times, so the resources that are created should be idempotent in case
|
||||
* it's called multiple times in a row.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
|
||||
/**
|
||||
* Post eval is called after the Sequence has called eval and submitted the
|
||||
* commands to the GPU for processing, and can be used to perform any
|
||||
* tear-down steps required as the computation iteration finishes. It's
|
||||
* worth noting that there are situations where eval can be called multiple
|
||||
* times, so the resources that are destroyed should not require a re-init
|
||||
* unless explicitly provided by the user.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
|
@ -0,0 +1,50 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
class OpBufferSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
OpBufferSyncDevice(
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize size);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpBufferSyncDevice() override;
|
||||
|
||||
/**
|
||||
* For device buffers, it records the copy command for the buffer to copy
|
||||
* the data from its staging to device memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
vk::Buffer *mPrimaryBuffer;
|
||||
vk::Buffer *mStagingBuffer;
|
||||
vk::DeviceSize mSize;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
50
kompute/src/include/kompute/operations/OpBufferSyncLocal.hpp
Normal file
50
kompute/src/include/kompute/operations/OpBufferSyncLocal.hpp
Normal file
|
@ -0,0 +1,50 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
class OpBufferSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
OpBufferSyncLocal(
|
||||
vk::Buffer *primaryBuffer,
|
||||
vk::Buffer *stagingBuffer,
|
||||
vk::DeviceSize size);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpBufferSyncLocal() override;
|
||||
|
||||
/**
|
||||
* For device buffers, it records the copy command for the buffer to copy
|
||||
* the data from its staging to device memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
vk::Buffer *mPrimaryBuffer;
|
||||
vk::Buffer *mStagingBuffer;
|
||||
vk::DeviceSize mSize;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
81
kompute/src/include/kompute/operations/OpMemoryBarrier.hpp
Normal file
81
kompute/src/include/kompute/operations/OpMemoryBarrier.hpp
Normal file
|
@ -0,0 +1,81 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that provides a general abstraction that simplifies the use of
|
||||
* algorithm and parameter components which can be used with shaders.
|
||||
* It exposes the pipeline barrier functionality specifically for memory
|
||||
* barriers that can be configured through the respective source and destination
|
||||
* masks
|
||||
*/
|
||||
class OpMemoryBarrier : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor that stores tensors as well as memory barrier parameters to
|
||||
* be used to create a pipeline barrier on the respective primary or staging
|
||||
* tensor.
|
||||
*
|
||||
* @param tensors The tensors to apply the memory barriers on
|
||||
* @param srcAccessMask The kp::AccessFlagBits for the source access mask
|
||||
* @param dstAccessMask The kp::AccessFlagBits for the destination access
|
||||
* mask
|
||||
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage
|
||||
* mask
|
||||
* @param dstStageMask The kp::PipelineStageFlagBits for the destination
|
||||
* stage mask
|
||||
* @param barrierOnPrimary Boolean to select primary or secondary buffers on
|
||||
* tensors
|
||||
*/
|
||||
OpMemoryBarrier(const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||
const vk::AccessFlagBits& srcAccessMask,
|
||||
const vk::AccessFlagBits& dstAccessMask,
|
||||
const vk::PipelineStageFlagBits& srcStageMask,
|
||||
const vk::PipelineStageFlagBits& dstStageMask,
|
||||
bool barrierOnPrimary = true);
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the reference to the
|
||||
* tensors and all the relevant access / stage masks created
|
||||
*/
|
||||
virtual ~OpMemoryBarrier() override;
|
||||
|
||||
/**
|
||||
* This records the memory barrier with the access and stage masks provided
|
||||
* across all relevant tensors.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
const vk::AccessFlagBits mSrcAccessMask;
|
||||
const vk::AccessFlagBits mDstAccessMask;
|
||||
const vk::PipelineStageFlagBits mSrcStageMask;
|
||||
const vk::PipelineStageFlagBits mDstStageMask;
|
||||
const bool mBarrierOnPrimary;
|
||||
const std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
58
kompute/src/include/kompute/operations/OpMult.hpp
Normal file
58
kompute/src/include/kompute/operations/OpMult.hpp
Normal file
|
@ -0,0 +1,58 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "ShaderOpMult.hpp"
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpAlgoDispatch.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that performs multiplication on two tensors and outpus on third
|
||||
* tensor.
|
||||
*/
|
||||
class OpMult : public OpAlgoDispatch
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the bare minimum
|
||||
* requirements for the operations to be able to create and manage their
|
||||
* sub-components.
|
||||
*
|
||||
* @param tensors Tensors that are to be used in this operation
|
||||
* @param algorithm An algorithm that will be overridden with the OpMult
|
||||
* shader data and the tensors provided which are expected to be 3
|
||||
*/
|
||||
OpMult(std::vector<std::shared_ptr<Tensor>> tensors,
|
||||
std::shared_ptr<Algorithm> algorithm)
|
||||
: OpAlgoDispatch(algorithm)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute OpMult constructor with params");
|
||||
|
||||
if (tensors.size() != 3) {
|
||||
throw std::runtime_error(
|
||||
"Kompute OpMult expected 3 tensors but got " +
|
||||
std::to_string(tensors.size()));
|
||||
}
|
||||
|
||||
const std::vector<uint32_t> spirv = std::vector<uint32_t>(
|
||||
SHADEROPMULT_COMP_SPV.begin(), SHADEROPMULT_COMP_SPV.end());
|
||||
|
||||
algorithm->rebuild<>(tensors, spirv);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default destructor, which is in charge of destroying the algorithm
|
||||
* components but does not destroy the underlying tensors
|
||||
*/
|
||||
~OpMult() override { KP_LOG_DEBUG("Kompute OpMult destructor started"); }
|
||||
};
|
||||
|
||||
} // End namespace kp
|
63
kompute/src/include/kompute/operations/OpTensorCopy.hpp
Normal file
63
kompute/src/include/kompute/operations/OpTensorCopy.hpp
Normal file
|
@ -0,0 +1,63 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that copies the data from the first tensor to the rest of the
|
||||
* tensors provided, using a record command for all the vectors. This operation
|
||||
* does not own/manage the memory of the tensors passed to it. The operation
|
||||
* must only receive tensors of type
|
||||
*/
|
||||
class OpTensorCopy : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorCopy() override;
|
||||
|
||||
/**
|
||||
* Records the copy commands from the first tensor into all the other
|
||||
* tensors provided. Also optionally records a barrier.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Copies the local vectors for all the tensors to sync the data with the
|
||||
* gpu.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
|
@ -0,0 +1,66 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
#include "kompute/Tensor.hpp"
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that syncs tensor's device by mapping local data into the device
|
||||
* memory. For TensorTypes::eDevice it will use a record operation for the
|
||||
* memory to be syncd into GPU memory which means that the operation will be
|
||||
* done in sync with GPU commands. For TensorTypes::eHost it will only map the
|
||||
* data into host memory which will happen during preEval before the recorded
|
||||
* commands are dispatched.
|
||||
*/
|
||||
class OpTensorSyncDevice : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation. The tensos
|
||||
* provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncDevice() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy
|
||||
* the data from its staging to device memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any postEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
vk::Buffer *mPrimaryBuffer;
|
||||
vk::Buffer *mStagingBuffer;
|
||||
vk::DeviceSize mSize;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
66
kompute/src/include/kompute/operations/OpTensorSyncLocal.hpp
Normal file
66
kompute/src/include/kompute/operations/OpTensorSyncLocal.hpp
Normal file
|
@ -0,0 +1,66 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "kompute/Core.hpp"
|
||||
|
||||
#include "kompute/Tensor.hpp"
|
||||
|
||||
#include "kompute/operations/OpBase.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
/**
|
||||
* Operation that syncs tensor's local memory by mapping device data into the
|
||||
* local CPU memory. For TensorTypes::eDevice it will use a record operation
|
||||
* for the memory to be syncd into GPU memory which means that the operation
|
||||
* will be done in sync with GPU commands. For TensorTypes::eHost it will
|
||||
* only map the data into host memory which will happen during preEval before
|
||||
* the recorded commands are dispatched.
|
||||
*/
|
||||
class OpTensorSyncLocal : public OpBase
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor with parameters that provides the core vulkan
|
||||
* resources and the tensors that will be used in the operation. The tensors
|
||||
* provided cannot be of type TensorTypes::eStorage.
|
||||
*
|
||||
* @param tensors Tensors that will be used to create in operation.
|
||||
*/
|
||||
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
|
||||
|
||||
/**
|
||||
* Default destructor. This class does not manage memory so it won't be
|
||||
* expecting the parent to perform a release.
|
||||
*/
|
||||
~OpTensorSyncLocal() override;
|
||||
|
||||
/**
|
||||
* For device tensors, it records the copy command for the tensor to copy
|
||||
* the data from its device to staging memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
void record(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* Does not perform any preEval commands.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
/**
|
||||
* For host tensors it performs the map command from the host memory into
|
||||
* local memory.
|
||||
*
|
||||
* @param commandBuffer The command buffer to record the command into.
|
||||
*/
|
||||
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
|
||||
|
||||
private:
|
||||
// -------------- ALWAYS OWNED RESOURCES
|
||||
std::vector<std::shared_ptr<Tensor>> mTensors;
|
||||
};
|
||||
|
||||
} // End namespace kp
|
69
kompute/src/logger/CMakeLists.txt
Normal file
69
kompute/src/logger/CMakeLists.txt
Normal file
|
@ -0,0 +1,69 @@
|
|||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
set(LOGGER_SOURCES Logger.cpp)
|
||||
|
||||
add_library(kp_logger ${LOGGER_SOURCES})
|
||||
|
||||
# Define log levels in code
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_TRACE=0)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_DEBUG=1)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_INFO=2)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_WARN=3)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_ERROR=4)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_CRITICAL=5)
|
||||
add_compile_definitions(KOMPUTE_LOG_LEVEL_OFF=6)
|
||||
|
||||
if(KOMPUTE_OPT_BUILD_PYTHON AND KOMPUTE_OPT_USE_SPDLOG)
|
||||
message(FATAL_ERROR "'KOMPUTE_OPT_BUILD_PYTHON' is incompatible with 'KOMPUTE_OPT_USE_SPDLOG'. To continue set either one option to 'OFF'.")
|
||||
endif()
|
||||
|
||||
if(KOMPUTE_OPT_ANDROID_BUILD AND KOMPUTE_OPT_USE_SPDLOG)
|
||||
message(FATAL_ERROR "'KOMPUTE_OPT_ANDROID_BUILD' is incompatible with 'KOMPUTE_OPT_USE_SPDLOG'. To continue set either one option to 'OFF'.")
|
||||
endif()
|
||||
|
||||
if(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Trace")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL TRACE)
|
||||
message(STATUS "Using log level Trace")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Debug")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL DEBUG)
|
||||
message(STATUS "Using log level Debug")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Info")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL INFO)
|
||||
message(STATUS "Using log level Info")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Warn")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL WARN)
|
||||
message(STATUS "Using log level Warn")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Error")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL ERROR)
|
||||
message(STATUS "Using log level Error")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Critical")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL CRITICAL)
|
||||
message(STATUS "Using log level Critical")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Off")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL OFF)
|
||||
message(STATUS "Using log level Off")
|
||||
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Default")
|
||||
set(KOMPUTE_OPT_LOG_LEVEL $<IF:$<CONFIG:Debug>,DEBUG,INFO>)
|
||||
message(STATUS "Setting KOMPUTE_OPT_LOG_LEVEL to according to the build type")
|
||||
else()
|
||||
message(FATAL_ERROR "Log level '${KOMPUTE_OPT_LOG_LEVEL}' unknown, use -DKOMPUTE_OPT_LOG_LEVEL={Trace, Debug, Info, Warn, Error, Critical, Off, Default} to set it to a correct value.")
|
||||
endif()
|
||||
|
||||
# Always make sure we define the Kompute log level independent of the Spdlog log level
|
||||
target_compile_definitions(kp_logger INTERFACE KOMPUTE_OPT_ACTIVE_LOG_LEVEL=KOMPUTE_LOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
|
||||
|
||||
# Link depending on how the logger should be setup
|
||||
if(NOT KOMPUTE_OPT_LOG_LEVEL_DISABLED)
|
||||
if(KOMPUTE_OPT_USE_SPDLOG)
|
||||
target_link_libraries(kp_logger PUBLIC spdlog::spdlog)
|
||||
target_compile_definitions(spdlog INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
|
||||
target_compile_definitions(kp_logger INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
|
||||
message(STATUS "setting SPDLOG_ACTIVE_LEVEL to SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL}")
|
||||
|
||||
if(KOMPUTE_OPT_SPDLOG_ASYNC_MODE)
|
||||
target_compile_definitions(kp_logger INTERFACE KOMPUTE_SPDLOG_ASYNC_LOGGING=1)
|
||||
endif()
|
||||
else()
|
||||
target_link_libraries(kp_logger PUBLIC fmt::fmt)
|
||||
endif()
|
||||
endif()
|
101
kompute/src/logger/Logger.cpp
Normal file
101
kompute/src/logger/Logger.cpp
Normal file
|
@ -0,0 +1,101 @@
|
|||
#include "kompute/logger/Logger.hpp"
|
||||
|
||||
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
|
||||
#if !KOMPUTE_OPT_USE_SPDLOG
|
||||
#else
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <spdlog/async.h>
|
||||
#include <spdlog/common.h>
|
||||
#include <spdlog/logger.h>
|
||||
#include <spdlog/sinks/stdout_color_sinks.h>
|
||||
#include <spdlog/spdlog.h>
|
||||
#include <string>
|
||||
#endif // !KOMPUTE_OPT_USE_SPDLOG
|
||||
|
||||
namespace logger {
|
||||
#if !KOMPUTE_OPT_USE_SPDLOG
|
||||
|
||||
void
|
||||
setupLogger()
|
||||
{
|
||||
}
|
||||
|
||||
#else
|
||||
constexpr int THREAD_QUEUE_LENGTH = 8192;
|
||||
|
||||
void
|
||||
setupLogger()
|
||||
{
|
||||
// Ensure we setup the logger only once
|
||||
static bool setup = false;
|
||||
static std::mutex setupMutex{};
|
||||
setupMutex.lock();
|
||||
if (setup) {
|
||||
setupMutex.unlock();
|
||||
return;
|
||||
}
|
||||
setup = true;
|
||||
setupMutex.unlock();
|
||||
|
||||
spdlog::init_thread_pool(THREAD_QUEUE_LENGTH, 1);
|
||||
spdlog::sink_ptr console_sink =
|
||||
std::make_shared<spdlog::sinks::stdout_color_sink_mt>();
|
||||
#if SPDLOG_ACTIVE_LEVEL < SPDLOG_LEVEL_INFO
|
||||
console_sink->set_pattern("[%H:%M:%S %z] [%^%=9l%$] [%=21s] %v");
|
||||
#else
|
||||
console_sink->set_pattern("[%H:%M:%S %z] [%^%=9l%$] [%=15s] %v");
|
||||
#endif
|
||||
std::vector<spdlog::sink_ptr> sinks{ console_sink };
|
||||
// TODO: Add flag in compile flags
|
||||
std::shared_ptr<spdlog::logger> logger =
|
||||
#if KOMPUTE_SPDLOG_ASYNC_LOGGING
|
||||
std::make_shared<spdlog::async_logger>(
|
||||
"",
|
||||
sinks.begin(),
|
||||
sinks.end(),
|
||||
spdlog::thread_pool(),
|
||||
spdlog::async_overflow_policy::block);
|
||||
#else
|
||||
std::make_shared<spdlog::logger>(
|
||||
"",
|
||||
sinks.begin(),
|
||||
sinks.end());
|
||||
#endif
|
||||
|
||||
logger->set_level(getLogLevel());
|
||||
|
||||
spdlog::set_default_logger(logger);
|
||||
}
|
||||
|
||||
spdlog::level::level_enum
|
||||
getLogLevel()
|
||||
{
|
||||
#if SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_TRACE
|
||||
return spdlog::level::trace;
|
||||
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_DEBUG
|
||||
return spdlog::level::debug;
|
||||
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_INFO
|
||||
return spdlog::level::info;
|
||||
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_WARN
|
||||
return spdlog::level::warn;
|
||||
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_ERROR
|
||||
return spdlog::level::error;
|
||||
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_CRITICAL
|
||||
return spdlog::level::critical;
|
||||
#else
|
||||
return spdlog::level::off;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
setLogLevel(const spdlog::level::level_enum level)
|
||||
{
|
||||
spdlog::default_logger()->set_level(level);
|
||||
}
|
||||
#endif // !KOMPUTE_OPT_USE_SPDLOG
|
||||
} // namespace logger
|
||||
|
||||
#endif
|
5
kompute/src/shaders/CMakeLists.txt
Normal file
5
kompute/src/shaders/CMakeLists.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
# ######################
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
add_subdirectory(glsl)
|
26
kompute/src/shaders/glsl/CMakeLists.txt
Normal file
26
kompute/src/shaders/glsl/CMakeLists.txt
Normal file
|
@ -0,0 +1,26 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
# ######################
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
# Check if build shaders from source is enabled
|
||||
if(KOMPUTE_OPT_BUILD_SHADERS)
|
||||
vulkan_compile_shader(INFILE ShaderOpMult.comp
|
||||
OUTFILE ShaderOpMult.hpp
|
||||
NAMESPACE "kp")
|
||||
|
||||
vulkan_compile_shader(INFILE ShaderLogisticRegression.comp
|
||||
OUTFILE ShaderLogisticRegression.hpp
|
||||
NAMESPACE "kp")
|
||||
else() # Else we will use our precompiled versions
|
||||
add_custom_command(OUTPUT $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/ShaderOpMult.hpp.in $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp)
|
||||
add_custom_command(OUTPUT $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/ShaderLogisticRegression.hpp.in $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp)
|
||||
endif()
|
||||
|
||||
add_library(kp_shader INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/ShaderOpMult.hpp"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/ShaderLogisticRegression.hpp")
|
||||
|
||||
target_include_directories(kp_shader INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
|
||||
|
||||
# Make sure we install shaders:
|
||||
install(FILES $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
install(FILES $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
52
kompute/src/shaders/glsl/ShaderLogisticRegression.comp
Normal file
52
kompute/src/shaders/glsl/ShaderLogisticRegression.comp
Normal file
|
@ -0,0 +1,52 @@
|
|||
#version 450
|
||||
|
||||
layout (constant_id = 0) const float m = 0;
|
||||
|
||||
layout (local_size_x = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) buffer bxi { float xi[]; };
|
||||
layout(set = 0, binding = 1) buffer bxj { float xj[]; };
|
||||
layout(set = 0, binding = 2) buffer by { float y[]; };
|
||||
layout(set = 0, binding = 3) buffer bwin { float win[]; };
|
||||
layout(set = 0, binding = 4) buffer bwouti { float wouti[]; };
|
||||
layout(set = 0, binding = 5) buffer bwoutj { float woutj[]; };
|
||||
layout(set = 0, binding = 6) buffer bbin { float bin[]; };
|
||||
layout(set = 0, binding = 7) buffer bbout { float bout[]; };
|
||||
layout(set = 0, binding = 8) buffer blout { float lout[]; };
|
||||
|
||||
float sigmoid(float z) {
|
||||
return 1.0 / (1.0 + exp(-z));
|
||||
}
|
||||
|
||||
float inference(vec2 x, vec2 w, float b) {
|
||||
// Compute the linear mapping function
|
||||
float z = dot(w, x) + b;
|
||||
// Calculate the y-hat with sigmoid
|
||||
float yHat = sigmoid(z);
|
||||
return yHat;
|
||||
}
|
||||
|
||||
float calculateLoss(float yHat, float y) {
|
||||
return -(y * log(yHat) + (1.0 - y) * log(1.0 - yHat));
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint idx = gl_GlobalInvocationID.x;
|
||||
|
||||
vec2 wCurr = vec2(win[0], win[1]);
|
||||
float bCurr = bin[0];
|
||||
|
||||
vec2 xCurr = vec2(xi[idx], xj[idx]);
|
||||
float yCurr = y[idx];
|
||||
|
||||
float yHat = inference(xCurr, wCurr, bCurr);
|
||||
|
||||
float dZ = yHat - yCurr;
|
||||
vec2 dW = (1. / m) * xCurr * dZ;
|
||||
float dB = (1. / m) * dZ;
|
||||
wouti[idx] = dW.x;
|
||||
woutj[idx] = dW.y;
|
||||
bout[idx] = dB;
|
||||
|
||||
lout[idx] = calculateLoss(yHat, yCurr);
|
||||
}
|
310
kompute/src/shaders/glsl/ShaderLogisticRegression.hpp.in
Normal file
310
kompute/src/shaders/glsl/ShaderLogisticRegression.hpp.in
Normal file
|
@ -0,0 +1,310 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
namespace kp {
|
||||
const std::array<uint32_t, 1204> SHADERLOGISTICREGRESSION_COMP_SPV = {
|
||||
0x07230203, 0x00010000, 0x0008000a, 0x000000ae,
|
||||
0x00000000, 0x00020011, 0x00000001, 0x0006000b,
|
||||
0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e,
|
||||
0x00000000, 0x0003000e, 0x00000000, 0x00000001,
|
||||
0x0006000f, 0x00000005, 0x00000004, 0x6e69616d,
|
||||
0x00000000, 0x00000041, 0x00060010, 0x00000004,
|
||||
0x00000011, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x00030003, 0x00000002, 0x000001c2, 0x00040005,
|
||||
0x00000004, 0x6e69616d, 0x00000000, 0x00050005,
|
||||
0x0000000a, 0x6d676973, 0x2864696f, 0x003b3166,
|
||||
0x00030005, 0x00000009, 0x0000007a, 0x00080005,
|
||||
0x00000012, 0x65666e69, 0x636e6572, 0x66762865,
|
||||
0x66763b32, 0x31663b32, 0x0000003b, 0x00030005,
|
||||
0x0000000f, 0x00000078, 0x00030005, 0x00000010,
|
||||
0x00000077, 0x00030005, 0x00000011, 0x00000062,
|
||||
0x00080005, 0x00000017, 0x636c6163, 0x74616c75,
|
||||
0x736f4c65, 0x31662873, 0x3b31663b, 0x00000000,
|
||||
0x00040005, 0x00000015, 0x74614879, 0x00000000,
|
||||
0x00030005, 0x00000016, 0x00000079, 0x00030005,
|
||||
0x00000021, 0x0000007a, 0x00040005, 0x00000027,
|
||||
0x74614879, 0x00000000, 0x00040005, 0x00000028,
|
||||
0x61726170, 0x0000006d, 0x00030005, 0x0000003e,
|
||||
0x00786469, 0x00080005, 0x00000041, 0x475f6c67,
|
||||
0x61626f6c, 0x766e496c, 0x7461636f, 0x496e6f69,
|
||||
0x00000044, 0x00040005, 0x00000046, 0x72754377,
|
||||
0x00000072, 0x00040005, 0x00000048, 0x6e697762,
|
||||
0x00000000, 0x00040006, 0x00000048, 0x00000000,
|
||||
0x006e6977, 0x00030005, 0x0000004a, 0x00000000,
|
||||
0x00040005, 0x00000054, 0x72754362, 0x00000072,
|
||||
0x00040005, 0x00000056, 0x6e696262, 0x00000000,
|
||||
0x00040006, 0x00000056, 0x00000000, 0x006e6962,
|
||||
0x00030005, 0x00000058, 0x00000000, 0x00040005,
|
||||
0x0000005b, 0x72754378, 0x00000072, 0x00030005,
|
||||
0x0000005d, 0x00697862, 0x00040006, 0x0000005d,
|
||||
0x00000000, 0x00006978, 0x00030005, 0x0000005f,
|
||||
0x00000000, 0x00030005, 0x00000064, 0x006a7862,
|
||||
0x00040006, 0x00000064, 0x00000000, 0x00006a78,
|
||||
0x00030005, 0x00000066, 0x00000000, 0x00040005,
|
||||
0x0000006b, 0x72754379, 0x00000072, 0x00030005,
|
||||
0x0000006d, 0x00007962, 0x00040006, 0x0000006d,
|
||||
0x00000000, 0x00000079, 0x00030005, 0x0000006f,
|
||||
0x00000000, 0x00040005, 0x00000073, 0x74614879,
|
||||
0x00000000, 0x00040005, 0x00000074, 0x61726170,
|
||||
0x0000006d, 0x00040005, 0x00000076, 0x61726170,
|
||||
0x0000006d, 0x00040005, 0x00000078, 0x61726170,
|
||||
0x0000006d, 0x00030005, 0x0000007b, 0x00005a64,
|
||||
0x00030005, 0x0000007f, 0x00005764, 0x00030005,
|
||||
0x00000080, 0x0000006d, 0x00030005, 0x00000086,
|
||||
0x00004264, 0x00040005, 0x0000008b, 0x756f7762,
|
||||
0x00006974, 0x00050006, 0x0000008b, 0x00000000,
|
||||
0x74756f77, 0x00000069, 0x00030005, 0x0000008d,
|
||||
0x00000000, 0x00040005, 0x00000093, 0x756f7762,
|
||||
0x00006a74, 0x00050006, 0x00000093, 0x00000000,
|
||||
0x74756f77, 0x0000006a, 0x00030005, 0x00000095,
|
||||
0x00000000, 0x00040005, 0x0000009c, 0x756f6262,
|
||||
0x00000074, 0x00050006, 0x0000009c, 0x00000000,
|
||||
0x74756f62, 0x00000000, 0x00030005, 0x0000009e,
|
||||
0x00000000, 0x00040005, 0x000000a3, 0x756f6c62,
|
||||
0x00000074, 0x00050006, 0x000000a3, 0x00000000,
|
||||
0x74756f6c, 0x00000000, 0x00030005, 0x000000a5,
|
||||
0x00000000, 0x00040005, 0x000000a7, 0x61726170,
|
||||
0x0000006d, 0x00040005, 0x000000a9, 0x61726170,
|
||||
0x0000006d, 0x00040047, 0x00000041, 0x0000000b,
|
||||
0x0000001c, 0x00040047, 0x00000047, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x00000048, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x00000048,
|
||||
0x00000003, 0x00040047, 0x0000004a, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x0000004a, 0x00000021,
|
||||
0x00000003, 0x00040047, 0x00000055, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x00000056, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x00000056,
|
||||
0x00000003, 0x00040047, 0x00000058, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x00000058, 0x00000021,
|
||||
0x00000006, 0x00040047, 0x0000005c, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x0000005d, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x0000005d,
|
||||
0x00000003, 0x00040047, 0x0000005f, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x0000005f, 0x00000021,
|
||||
0x00000000, 0x00040047, 0x00000063, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x00000064, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x00000064,
|
||||
0x00000003, 0x00040047, 0x00000066, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x00000066, 0x00000021,
|
||||
0x00000001, 0x00040047, 0x0000006c, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x0000006d, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x0000006d,
|
||||
0x00000003, 0x00040047, 0x0000006f, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x0000006f, 0x00000021,
|
||||
0x00000002, 0x00040047, 0x00000080, 0x00000001,
|
||||
0x00000000, 0x00040047, 0x0000008a, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x0000008b, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x0000008b,
|
||||
0x00000003, 0x00040047, 0x0000008d, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x0000008d, 0x00000021,
|
||||
0x00000004, 0x00040047, 0x00000092, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x00000093, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x00000093,
|
||||
0x00000003, 0x00040047, 0x00000095, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x00000095, 0x00000021,
|
||||
0x00000005, 0x00040047, 0x0000009b, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x0000009c, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x0000009c,
|
||||
0x00000003, 0x00040047, 0x0000009e, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x0000009e, 0x00000021,
|
||||
0x00000007, 0x00040047, 0x000000a2, 0x00000006,
|
||||
0x00000004, 0x00050048, 0x000000a3, 0x00000000,
|
||||
0x00000023, 0x00000000, 0x00030047, 0x000000a3,
|
||||
0x00000003, 0x00040047, 0x000000a5, 0x00000022,
|
||||
0x00000000, 0x00040047, 0x000000a5, 0x00000021,
|
||||
0x00000008, 0x00040047, 0x000000ad, 0x0000000b,
|
||||
0x00000019, 0x00020013, 0x00000002, 0x00030021,
|
||||
0x00000003, 0x00000002, 0x00030016, 0x00000006,
|
||||
0x00000020, 0x00040020, 0x00000007, 0x00000007,
|
||||
0x00000006, 0x00040021, 0x00000008, 0x00000006,
|
||||
0x00000007, 0x00040017, 0x0000000c, 0x00000006,
|
||||
0x00000002, 0x00040020, 0x0000000d, 0x00000007,
|
||||
0x0000000c, 0x00060021, 0x0000000e, 0x00000006,
|
||||
0x0000000d, 0x0000000d, 0x00000007, 0x00050021,
|
||||
0x00000014, 0x00000006, 0x00000007, 0x00000007,
|
||||
0x0004002b, 0x00000006, 0x00000019, 0x3f800000,
|
||||
0x00040015, 0x0000003c, 0x00000020, 0x00000000,
|
||||
0x00040020, 0x0000003d, 0x00000007, 0x0000003c,
|
||||
0x00040017, 0x0000003f, 0x0000003c, 0x00000003,
|
||||
0x00040020, 0x00000040, 0x00000001, 0x0000003f,
|
||||
0x0004003b, 0x00000040, 0x00000041, 0x00000001,
|
||||
0x0004002b, 0x0000003c, 0x00000042, 0x00000000,
|
||||
0x00040020, 0x00000043, 0x00000001, 0x0000003c,
|
||||
0x0003001d, 0x00000047, 0x00000006, 0x0003001e,
|
||||
0x00000048, 0x00000047, 0x00040020, 0x00000049,
|
||||
0x00000002, 0x00000048, 0x0004003b, 0x00000049,
|
||||
0x0000004a, 0x00000002, 0x00040015, 0x0000004b,
|
||||
0x00000020, 0x00000001, 0x0004002b, 0x0000004b,
|
||||
0x0000004c, 0x00000000, 0x00040020, 0x0000004d,
|
||||
0x00000002, 0x00000006, 0x0004002b, 0x0000004b,
|
||||
0x00000050, 0x00000001, 0x0003001d, 0x00000055,
|
||||
0x00000006, 0x0003001e, 0x00000056, 0x00000055,
|
||||
0x00040020, 0x00000057, 0x00000002, 0x00000056,
|
||||
0x0004003b, 0x00000057, 0x00000058, 0x00000002,
|
||||
0x0003001d, 0x0000005c, 0x00000006, 0x0003001e,
|
||||
0x0000005d, 0x0000005c, 0x00040020, 0x0000005e,
|
||||
0x00000002, 0x0000005d, 0x0004003b, 0x0000005e,
|
||||
0x0000005f, 0x00000002, 0x0003001d, 0x00000063,
|
||||
0x00000006, 0x0003001e, 0x00000064, 0x00000063,
|
||||
0x00040020, 0x00000065, 0x00000002, 0x00000064,
|
||||
0x0004003b, 0x00000065, 0x00000066, 0x00000002,
|
||||
0x0003001d, 0x0000006c, 0x00000006, 0x0003001e,
|
||||
0x0000006d, 0x0000006c, 0x00040020, 0x0000006e,
|
||||
0x00000002, 0x0000006d, 0x0004003b, 0x0000006e,
|
||||
0x0000006f, 0x00000002, 0x00040032, 0x00000006,
|
||||
0x00000080, 0x00000000, 0x0003001d, 0x0000008a,
|
||||
0x00000006, 0x0003001e, 0x0000008b, 0x0000008a,
|
||||
0x00040020, 0x0000008c, 0x00000002, 0x0000008b,
|
||||
0x0004003b, 0x0000008c, 0x0000008d, 0x00000002,
|
||||
0x0003001d, 0x00000092, 0x00000006, 0x0003001e,
|
||||
0x00000093, 0x00000092, 0x00040020, 0x00000094,
|
||||
0x00000002, 0x00000093, 0x0004003b, 0x00000094,
|
||||
0x00000095, 0x00000002, 0x0004002b, 0x0000003c,
|
||||
0x00000097, 0x00000001, 0x0003001d, 0x0000009b,
|
||||
0x00000006, 0x0003001e, 0x0000009c, 0x0000009b,
|
||||
0x00040020, 0x0000009d, 0x00000002, 0x0000009c,
|
||||
0x0004003b, 0x0000009d, 0x0000009e, 0x00000002,
|
||||
0x0003001d, 0x000000a2, 0x00000006, 0x0003001e,
|
||||
0x000000a3, 0x000000a2, 0x00040020, 0x000000a4,
|
||||
0x00000002, 0x000000a3, 0x0004003b, 0x000000a4,
|
||||
0x000000a5, 0x00000002, 0x0006002c, 0x0000003f,
|
||||
0x000000ad, 0x00000097, 0x00000097, 0x00000097,
|
||||
0x00050036, 0x00000002, 0x00000004, 0x00000000,
|
||||
0x00000003, 0x000200f8, 0x00000005, 0x0004003b,
|
||||
0x0000003d, 0x0000003e, 0x00000007, 0x0004003b,
|
||||
0x0000000d, 0x00000046, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x00000054, 0x00000007, 0x0004003b,
|
||||
0x0000000d, 0x0000005b, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x0000006b, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x00000073, 0x00000007, 0x0004003b,
|
||||
0x0000000d, 0x00000074, 0x00000007, 0x0004003b,
|
||||
0x0000000d, 0x00000076, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x00000078, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x0000007b, 0x00000007, 0x0004003b,
|
||||
0x0000000d, 0x0000007f, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x00000086, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x000000a7, 0x00000007, 0x0004003b,
|
||||
0x00000007, 0x000000a9, 0x00000007, 0x00050041,
|
||||
0x00000043, 0x00000044, 0x00000041, 0x00000042,
|
||||
0x0004003d, 0x0000003c, 0x00000045, 0x00000044,
|
||||
0x0003003e, 0x0000003e, 0x00000045, 0x00060041,
|
||||
0x0000004d, 0x0000004e, 0x0000004a, 0x0000004c,
|
||||
0x0000004c, 0x0004003d, 0x00000006, 0x0000004f,
|
||||
0x0000004e, 0x00060041, 0x0000004d, 0x00000051,
|
||||
0x0000004a, 0x0000004c, 0x00000050, 0x0004003d,
|
||||
0x00000006, 0x00000052, 0x00000051, 0x00050050,
|
||||
0x0000000c, 0x00000053, 0x0000004f, 0x00000052,
|
||||
0x0003003e, 0x00000046, 0x00000053, 0x00060041,
|
||||
0x0000004d, 0x00000059, 0x00000058, 0x0000004c,
|
||||
0x0000004c, 0x0004003d, 0x00000006, 0x0000005a,
|
||||
0x00000059, 0x0003003e, 0x00000054, 0x0000005a,
|
||||
0x0004003d, 0x0000003c, 0x00000060, 0x0000003e,
|
||||
0x00060041, 0x0000004d, 0x00000061, 0x0000005f,
|
||||
0x0000004c, 0x00000060, 0x0004003d, 0x00000006,
|
||||
0x00000062, 0x00000061, 0x0004003d, 0x0000003c,
|
||||
0x00000067, 0x0000003e, 0x00060041, 0x0000004d,
|
||||
0x00000068, 0x00000066, 0x0000004c, 0x00000067,
|
||||
0x0004003d, 0x00000006, 0x00000069, 0x00000068,
|
||||
0x00050050, 0x0000000c, 0x0000006a, 0x00000062,
|
||||
0x00000069, 0x0003003e, 0x0000005b, 0x0000006a,
|
||||
0x0004003d, 0x0000003c, 0x00000070, 0x0000003e,
|
||||
0x00060041, 0x0000004d, 0x00000071, 0x0000006f,
|
||||
0x0000004c, 0x00000070, 0x0004003d, 0x00000006,
|
||||
0x00000072, 0x00000071, 0x0003003e, 0x0000006b,
|
||||
0x00000072, 0x0004003d, 0x0000000c, 0x00000075,
|
||||
0x0000005b, 0x0003003e, 0x00000074, 0x00000075,
|
||||
0x0004003d, 0x0000000c, 0x00000077, 0x00000046,
|
||||
0x0003003e, 0x00000076, 0x00000077, 0x0004003d,
|
||||
0x00000006, 0x00000079, 0x00000054, 0x0003003e,
|
||||
0x00000078, 0x00000079, 0x00070039, 0x00000006,
|
||||
0x0000007a, 0x00000012, 0x00000074, 0x00000076,
|
||||
0x00000078, 0x0003003e, 0x00000073, 0x0000007a,
|
||||
0x0004003d, 0x00000006, 0x0000007c, 0x00000073,
|
||||
0x0004003d, 0x00000006, 0x0000007d, 0x0000006b,
|
||||
0x00050083, 0x00000006, 0x0000007e, 0x0000007c,
|
||||
0x0000007d, 0x0003003e, 0x0000007b, 0x0000007e,
|
||||
0x00050088, 0x00000006, 0x00000081, 0x00000019,
|
||||
0x00000080, 0x0004003d, 0x0000000c, 0x00000082,
|
||||
0x0000005b, 0x0005008e, 0x0000000c, 0x00000083,
|
||||
0x00000082, 0x00000081, 0x0004003d, 0x00000006,
|
||||
0x00000084, 0x0000007b, 0x0005008e, 0x0000000c,
|
||||
0x00000085, 0x00000083, 0x00000084, 0x0003003e,
|
||||
0x0000007f, 0x00000085, 0x00050088, 0x00000006,
|
||||
0x00000087, 0x00000019, 0x00000080, 0x0004003d,
|
||||
0x00000006, 0x00000088, 0x0000007b, 0x00050085,
|
||||
0x00000006, 0x00000089, 0x00000087, 0x00000088,
|
||||
0x0003003e, 0x00000086, 0x00000089, 0x0004003d,
|
||||
0x0000003c, 0x0000008e, 0x0000003e, 0x00050041,
|
||||
0x00000007, 0x0000008f, 0x0000007f, 0x00000042,
|
||||
0x0004003d, 0x00000006, 0x00000090, 0x0000008f,
|
||||
0x00060041, 0x0000004d, 0x00000091, 0x0000008d,
|
||||
0x0000004c, 0x0000008e, 0x0003003e, 0x00000091,
|
||||
0x00000090, 0x0004003d, 0x0000003c, 0x00000096,
|
||||
0x0000003e, 0x00050041, 0x00000007, 0x00000098,
|
||||
0x0000007f, 0x00000097, 0x0004003d, 0x00000006,
|
||||
0x00000099, 0x00000098, 0x00060041, 0x0000004d,
|
||||
0x0000009a, 0x00000095, 0x0000004c, 0x00000096,
|
||||
0x0003003e, 0x0000009a, 0x00000099, 0x0004003d,
|
||||
0x0000003c, 0x0000009f, 0x0000003e, 0x0004003d,
|
||||
0x00000006, 0x000000a0, 0x00000086, 0x00060041,
|
||||
0x0000004d, 0x000000a1, 0x0000009e, 0x0000004c,
|
||||
0x0000009f, 0x0003003e, 0x000000a1, 0x000000a0,
|
||||
0x0004003d, 0x0000003c, 0x000000a6, 0x0000003e,
|
||||
0x0004003d, 0x00000006, 0x000000a8, 0x00000073,
|
||||
0x0003003e, 0x000000a7, 0x000000a8, 0x0004003d,
|
||||
0x00000006, 0x000000aa, 0x0000006b, 0x0003003e,
|
||||
0x000000a9, 0x000000aa, 0x00060039, 0x00000006,
|
||||
0x000000ab, 0x00000017, 0x000000a7, 0x000000a9,
|
||||
0x00060041, 0x0000004d, 0x000000ac, 0x000000a5,
|
||||
0x0000004c, 0x000000a6, 0x0003003e, 0x000000ac,
|
||||
0x000000ab, 0x000100fd, 0x00010038, 0x00050036,
|
||||
0x00000006, 0x0000000a, 0x00000000, 0x00000008,
|
||||
0x00030037, 0x00000007, 0x00000009, 0x000200f8,
|
||||
0x0000000b, 0x0004003d, 0x00000006, 0x0000001a,
|
||||
0x00000009, 0x0004007f, 0x00000006, 0x0000001b,
|
||||
0x0000001a, 0x0006000c, 0x00000006, 0x0000001c,
|
||||
0x00000001, 0x0000001b, 0x0000001b, 0x00050081,
|
||||
0x00000006, 0x0000001d, 0x00000019, 0x0000001c,
|
||||
0x00050088, 0x00000006, 0x0000001e, 0x00000019,
|
||||
0x0000001d, 0x000200fe, 0x0000001e, 0x00010038,
|
||||
0x00050036, 0x00000006, 0x00000012, 0x00000000,
|
||||
0x0000000e, 0x00030037, 0x0000000d, 0x0000000f,
|
||||
0x00030037, 0x0000000d, 0x00000010, 0x00030037,
|
||||
0x00000007, 0x00000011, 0x000200f8, 0x00000013,
|
||||
0x0004003b, 0x00000007, 0x00000021, 0x00000007,
|
||||
0x0004003b, 0x00000007, 0x00000027, 0x00000007,
|
||||
0x0004003b, 0x00000007, 0x00000028, 0x00000007,
|
||||
0x0004003d, 0x0000000c, 0x00000022, 0x00000010,
|
||||
0x0004003d, 0x0000000c, 0x00000023, 0x0000000f,
|
||||
0x00050094, 0x00000006, 0x00000024, 0x00000022,
|
||||
0x00000023, 0x0004003d, 0x00000006, 0x00000025,
|
||||
0x00000011, 0x00050081, 0x00000006, 0x00000026,
|
||||
0x00000024, 0x00000025, 0x0003003e, 0x00000021,
|
||||
0x00000026, 0x0004003d, 0x00000006, 0x00000029,
|
||||
0x00000021, 0x0003003e, 0x00000028, 0x00000029,
|
||||
0x00050039, 0x00000006, 0x0000002a, 0x0000000a,
|
||||
0x00000028, 0x0003003e, 0x00000027, 0x0000002a,
|
||||
0x0004003d, 0x00000006, 0x0000002b, 0x00000027,
|
||||
0x000200fe, 0x0000002b, 0x00010038, 0x00050036,
|
||||
0x00000006, 0x00000017, 0x00000000, 0x00000014,
|
||||
0x00030037, 0x00000007, 0x00000015, 0x00030037,
|
||||
0x00000007, 0x00000016, 0x000200f8, 0x00000018,
|
||||
0x0004003d, 0x00000006, 0x0000002e, 0x00000016,
|
||||
0x0004003d, 0x00000006, 0x0000002f, 0x00000015,
|
||||
0x0006000c, 0x00000006, 0x00000030, 0x00000001,
|
||||
0x0000001c, 0x0000002f, 0x00050085, 0x00000006,
|
||||
0x00000031, 0x0000002e, 0x00000030, 0x0004003d,
|
||||
0x00000006, 0x00000032, 0x00000016, 0x00050083,
|
||||
0x00000006, 0x00000033, 0x00000019, 0x00000032,
|
||||
0x0004003d, 0x00000006, 0x00000034, 0x00000015,
|
||||
0x00050083, 0x00000006, 0x00000035, 0x00000019,
|
||||
0x00000034, 0x0006000c, 0x00000006, 0x00000036,
|
||||
0x00000001, 0x0000001c, 0x00000035, 0x00050085,
|
||||
0x00000006, 0x00000037, 0x00000033, 0x00000036,
|
||||
0x00050081, 0x00000006, 0x00000038, 0x00000031,
|
||||
0x00000037, 0x0004007f, 0x00000006, 0x00000039,
|
||||
0x00000038, 0x000200fe, 0x00000039, 0x00010038 };
|
||||
} // namespace kp
|
||||
|
||||
|
28
kompute/src/shaders/glsl/ShaderOpMult.comp
Normal file
28
kompute/src/shaders/glsl/ShaderOpMult.comp
Normal file
|
@ -0,0 +1,28 @@
|
|||
#version 450
|
||||
|
||||
layout(set = 0, binding = 0) buffer tensorLhs {
|
||||
float valuesLhs[ ];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer tensorRhs {
|
||||
float valuesRhs[ ];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer tensorOutput {
|
||||
float valuesOutput[ ];
|
||||
};
|
||||
|
||||
layout (constant_id = 0) const uint LEN_LHS = 0;
|
||||
layout (constant_id = 1) const uint LEN_RHS = 0;
|
||||
layout (constant_id = 2) const uint LEN_OUT = 0;
|
||||
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
|
||||
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
||||
}
|
||||
|
||||
|
101
kompute/src/shaders/glsl/ShaderOpMult.hpp.in
Normal file
101
kompute/src/shaders/glsl/ShaderOpMult.hpp.in
Normal file
|
@ -0,0 +1,101 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
namespace kp {
|
||||
const std::array<uint32_t, 366> SHADEROPMULT_COMP_SPV = {
|
||||
0x07230203, 0x00010000, 0x0008000a, 0x0000002e,
|
||||
0x00000000, 0x00020011, 0x00000001, 0x0006000b,
|
||||
0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e,
|
||||
0x00000000, 0x0003000e, 0x00000000, 0x00000001,
|
||||
0x0006000f, 0x00000005, 0x00000004, 0x6e69616d,
|
||||
0x00000000, 0x0000000b, 0x00060010, 0x00000004,
|
||||
0x00000011, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x00030003, 0x00000002, 0x000001c2, 0x00040005,
|
||||
0x00000004, 0x6e69616d, 0x00000000, 0x00040005,
|
||||
0x00000008, 0x65646e69, 0x00000078, 0x00080005,
|
||||
0x0000000b, 0x475f6c67, 0x61626f6c, 0x766e496c,
|
||||
0x7461636f, 0x496e6f69, 0x00000044, 0x00060005,
|
||||
0x00000012, 0x736e6574, 0x754f726f, 0x74757074,
|
||||
0x00000000, 0x00070006, 0x00000012, 0x00000000,
|
||||
0x756c6176, 0x754f7365, 0x74757074, 0x00000000,
|
||||
0x00030005, 0x00000014, 0x00000000, 0x00050005,
|
||||
0x00000019, 0x736e6574, 0x684c726f, 0x00000073,
|
||||
0x00060006, 0x00000019, 0x00000000, 0x756c6176,
|
||||
0x684c7365, 0x00000073, 0x00030005, 0x0000001b,
|
||||
0x00000000, 0x00050005, 0x00000021, 0x736e6574,
|
||||
0x6852726f, 0x00000073, 0x00060006, 0x00000021,
|
||||
0x00000000, 0x756c6176, 0x68527365, 0x00000073,
|
||||
0x00030005, 0x00000023, 0x00000000, 0x00040005,
|
||||
0x00000029, 0x5f4e454c, 0x0053484c, 0x00040005,
|
||||
0x0000002a, 0x5f4e454c, 0x00534852, 0x00040005,
|
||||
0x0000002b, 0x5f4e454c, 0x0054554f, 0x00040047,
|
||||
0x0000000b, 0x0000000b, 0x0000001c, 0x00040047,
|
||||
0x00000011, 0x00000006, 0x00000004, 0x00050048,
|
||||
0x00000012, 0x00000000, 0x00000023, 0x00000000,
|
||||
0x00030047, 0x00000012, 0x00000003, 0x00040047,
|
||||
0x00000014, 0x00000022, 0x00000000, 0x00040047,
|
||||
0x00000014, 0x00000021, 0x00000002, 0x00040047,
|
||||
0x00000018, 0x00000006, 0x00000004, 0x00050048,
|
||||
0x00000019, 0x00000000, 0x00000023, 0x00000000,
|
||||
0x00030047, 0x00000019, 0x00000003, 0x00040047,
|
||||
0x0000001b, 0x00000022, 0x00000000, 0x00040047,
|
||||
0x0000001b, 0x00000021, 0x00000000, 0x00040047,
|
||||
0x00000020, 0x00000006, 0x00000004, 0x00050048,
|
||||
0x00000021, 0x00000000, 0x00000023, 0x00000000,
|
||||
0x00030047, 0x00000021, 0x00000003, 0x00040047,
|
||||
0x00000023, 0x00000022, 0x00000000, 0x00040047,
|
||||
0x00000023, 0x00000021, 0x00000001, 0x00040047,
|
||||
0x00000029, 0x00000001, 0x00000000, 0x00040047,
|
||||
0x0000002a, 0x00000001, 0x00000001, 0x00040047,
|
||||
0x0000002b, 0x00000001, 0x00000002, 0x00040047,
|
||||
0x0000002d, 0x0000000b, 0x00000019, 0x00020013,
|
||||
0x00000002, 0x00030021, 0x00000003, 0x00000002,
|
||||
0x00040015, 0x00000006, 0x00000020, 0x00000000,
|
||||
0x00040020, 0x00000007, 0x00000007, 0x00000006,
|
||||
0x00040017, 0x00000009, 0x00000006, 0x00000003,
|
||||
0x00040020, 0x0000000a, 0x00000001, 0x00000009,
|
||||
0x0004003b, 0x0000000a, 0x0000000b, 0x00000001,
|
||||
0x0004002b, 0x00000006, 0x0000000c, 0x00000000,
|
||||
0x00040020, 0x0000000d, 0x00000001, 0x00000006,
|
||||
0x00030016, 0x00000010, 0x00000020, 0x0003001d,
|
||||
0x00000011, 0x00000010, 0x0003001e, 0x00000012,
|
||||
0x00000011, 0x00040020, 0x00000013, 0x00000002,
|
||||
0x00000012, 0x0004003b, 0x00000013, 0x00000014,
|
||||
0x00000002, 0x00040015, 0x00000015, 0x00000020,
|
||||
0x00000001, 0x0004002b, 0x00000015, 0x00000016,
|
||||
0x00000000, 0x0003001d, 0x00000018, 0x00000010,
|
||||
0x0003001e, 0x00000019, 0x00000018, 0x00040020,
|
||||
0x0000001a, 0x00000002, 0x00000019, 0x0004003b,
|
||||
0x0000001a, 0x0000001b, 0x00000002, 0x00040020,
|
||||
0x0000001d, 0x00000002, 0x00000010, 0x0003001d,
|
||||
0x00000020, 0x00000010, 0x0003001e, 0x00000021,
|
||||
0x00000020, 0x00040020, 0x00000022, 0x00000002,
|
||||
0x00000021, 0x0004003b, 0x00000022, 0x00000023,
|
||||
0x00000002, 0x00040032, 0x00000006, 0x00000029,
|
||||
0x00000000, 0x00040032, 0x00000006, 0x0000002a,
|
||||
0x00000000, 0x00040032, 0x00000006, 0x0000002b,
|
||||
0x00000000, 0x0004002b, 0x00000006, 0x0000002c,
|
||||
0x00000001, 0x0006002c, 0x00000009, 0x0000002d,
|
||||
0x0000002c, 0x0000002c, 0x0000002c, 0x00050036,
|
||||
0x00000002, 0x00000004, 0x00000000, 0x00000003,
|
||||
0x000200f8, 0x00000005, 0x0004003b, 0x00000007,
|
||||
0x00000008, 0x00000007, 0x00050041, 0x0000000d,
|
||||
0x0000000e, 0x0000000b, 0x0000000c, 0x0004003d,
|
||||
0x00000006, 0x0000000f, 0x0000000e, 0x0003003e,
|
||||
0x00000008, 0x0000000f, 0x0004003d, 0x00000006,
|
||||
0x00000017, 0x00000008, 0x0004003d, 0x00000006,
|
||||
0x0000001c, 0x00000008, 0x00060041, 0x0000001d,
|
||||
0x0000001e, 0x0000001b, 0x00000016, 0x0000001c,
|
||||
0x0004003d, 0x00000010, 0x0000001f, 0x0000001e,
|
||||
0x0004003d, 0x00000006, 0x00000024, 0x00000008,
|
||||
0x00060041, 0x0000001d, 0x00000025, 0x00000023,
|
||||
0x00000016, 0x00000024, 0x0004003d, 0x00000010,
|
||||
0x00000026, 0x00000025, 0x00050085, 0x00000010,
|
||||
0x00000027, 0x0000001f, 0x00000026, 0x00060041,
|
||||
0x0000001d, 0x00000028, 0x00000014, 0x00000016,
|
||||
0x00000017, 0x0003003e, 0x00000028, 0x00000027,
|
||||
0x000100fd, 0x00010038 };
|
||||
} // namespace kp
|
||||
|
||||
|
29
kompute/src/shaders/hlsl/computeheadless.comp
Normal file
29
kompute/src/shaders/hlsl/computeheadless.comp
Normal file
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2020 Google LLC
|
||||
|
||||
RWStructuredBuffer<uint> values : register(u0);
|
||||
[[vk::constant_id(0)]] const uint BUFFER_ELEMENTS = 32;
|
||||
|
||||
uint fibonacci(uint n) {
|
||||
if(n <= 1){
|
||||
return n;
|
||||
}
|
||||
uint curr = 1;
|
||||
uint prev = 1;
|
||||
for(uint i = 2; i < n; ++i) {
|
||||
uint temp = curr;
|
||||
curr += prev;
|
||||
prev = temp;
|
||||
}
|
||||
return curr;
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(uint3 GlobalInvocationID : SV_DispatchThreadID)
|
||||
{
|
||||
uint index = GlobalInvocationID.x;
|
||||
if (index >= BUFFER_ELEMENTS)
|
||||
return;
|
||||
values[index] = fibonacci(values[index]);
|
||||
}
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue