Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.

This commit is contained in:
niansa 2023-06-22 12:58:07 +02:00 committed by Adam Treat
parent acfc5478ff
commit 4cdaa3c9cb
97 changed files with 13550 additions and 26 deletions

450
kompute/src/Algorithm.cpp Normal file
View file

@ -0,0 +1,450 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include <fstream>
#include "kompute/Algorithm.hpp"
namespace kp {
Algorithm::~Algorithm()
{
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
this->destroy();
}
bool
Algorithm::isInit()
{
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
this->mDescriptorPool && this->mDescriptorSet &&
this->mDescriptorSetLayout && this->mShaderModule;
}
void
Algorithm::destroy()
{
// We don't have to free memory on destroy as it's freed by the
// commandBuffer destructor if (this->mPushConstantsData) {
// free(this->mPushConstantsData);
// }
// if (this->mSpecializationConstantsData) {
// free(this->mSpecializationConstantsData);
// }
if (!this->mDevice) {
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
"Device pointer");
return;
}
if (this->mFreePipeline && this->mPipeline) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
if (!this->mPipeline) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline but it is null");
}
this->mDevice->destroy(
*this->mPipeline,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipeline = nullptr;
}
if (this->mFreePipelineCache && this->mPipelineCache) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline cache but it is null");
}
this->mDevice->destroy(
*this->mPipelineCache,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineCache = nullptr;
}
if (this->mFreePipelineLayout && this->mPipelineLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
if (!this->mPipelineLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline layout but it is null");
}
this->mDevice->destroy(
*this->mPipelineLayout,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineLayout = nullptr;
}
if (this->mFreeShaderModule && this->mShaderModule) {
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
if (!this->mShaderModule) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
"module but it is null");
}
this->mDevice->destroy(
*this->mShaderModule,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mShaderModule = nullptr;
}
freeParameters();
}
void
Algorithm::freeParameters()
{
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
if (!this->mDescriptorSetLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor set layout but it is null");
}
this->mDevice->destroy(
*this->mDescriptorSetLayout,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDescriptorSetLayout = nullptr;
}
}
void
Algorithm::createParameters()
{
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
for (size_t i = 0; i < this->mTensors.size(); i++) {
descriptorSetBindings.push_back(
vk::DescriptorSetLayoutBinding(i, // Binding index
vk::DescriptorType::eStorageBuffer,
1, // Descriptor count
vk::ShaderStageFlagBits::eCompute));
}
// This is the component that is fed into the pipeline
vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
vk::DescriptorSetLayoutCreateFlags(),
static_cast<uint32_t>(descriptorSetBindings.size()),
descriptorSetBindings.data());
KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout");
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
vk::Result result = this->mDevice->createDescriptorSetLayout(
&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSetLayout = true;
KP_LOG_DEBUG("Successfully allocated descriptor set layout.");
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
for (size_t i = 0; i < this->mTensors.size(); i++) {
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
vk::DescriptorBufferInfo descriptorBufferInfo =
this->mTensors[i]->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet(*this->mDescriptorSet,
i, // Destination binding
0, // Destination array element
1, // Descriptor count
vk::DescriptorType::eStorageBuffer,
nullptr, // Descriptor image info
&descriptorBufferInfo));
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
nullptr);
}
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
}
void
Algorithm::updateParameters()
{
KP_LOG_DEBUG("Kompute Algorithm updateParameters started");
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
for (size_t i = 0; i < this->mTensors.size(); i++) {
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
vk::DescriptorBufferInfo descriptorBufferInfo =
this->mTensors[i]->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet(*this->mDescriptorSet,
i, // Destination binding
0, // Destination array element
1, // Descriptor count
vk::DescriptorType::eStorageBuffer,
nullptr, // Descriptor image info
&descriptorBufferInfo));
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
nullptr);
}
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
}
void
Algorithm::createShaderModule()
{
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
sizeof(uint32_t) *
this->mSpirv.size(),
this->mSpirv.data());
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
this->mSpirv.size());
this->mFreeShaderModule = true;
this->mShaderModule = std::make_shared<vk::ShaderModule>();
this->mDevice->createShaderModule(
&shaderModuleInfo, nullptr, this->mShaderModule.get());
this->mFreeShaderModule = true;
KP_LOG_DEBUG("Kompute Algorithm create shader module success");
}
void
Algorithm::createPipeline()
{
KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline");
vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
vk::PipelineLayoutCreateFlags(),
1, // Set layout count
this->mDescriptorSetLayout.get());
vk::PushConstantRange pushConstantRange;
if (this->mPushConstantsSize) {
pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
pushConstantRange.setOffset(0);
pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize *
this->mPushConstantsSize);
pipelineLayoutInfo.setPushConstantRangeCount(1);
pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
}
this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
this->mDevice->createPipelineLayout(
&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
this->mFreePipelineLayout = true;
std::vector<vk::SpecializationMapEntry> specializationEntries;
for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
vk::SpecializationMapEntry specializationEntry(
static_cast<uint32_t>(i),
static_cast<uint32_t>(
this->mSpecializationConstantsDataTypeMemorySize * i),
this->mSpecializationConstantsDataTypeMemorySize);
specializationEntries.push_back(specializationEntry);
}
// This passes ownership of the memory so we remove ownership from
// specialization container by using "transferDataOwnership"
vk::SpecializationInfo specializationInfo(
static_cast<uint32_t>(specializationEntries.size()),
specializationEntries.data(),
this->mSpecializationConstantsDataTypeMemorySize *
this->mSpecializationConstantsSize,
this->mSpecializationConstantsData);
vk::PipelineShaderStageCreateInfo shaderStage(
vk::PipelineShaderStageCreateFlags(),
vk::ShaderStageFlagBits::eCompute,
*this->mShaderModule,
"main",
&specializationInfo);
static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
if(!*globalPipelineCache) {
vk::PipelineCacheCreateInfo pipelineCacheInfo =
vk::PipelineCacheCreateInfo();
this->mPipelineCache = globalPipelineCache;
this->mFreePipelineCache = true;
this->mDevice->createPipelineCache(
&pipelineCacheInfo, nullptr, globalPipelineCache.get());
}
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
shaderStage,
*this->mPipelineLayout,
vk::Pipeline(),
0);
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
vk::ResultValue<vk::Pipeline> pipelineResult =
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
if (pipelineResult.result != vk::Result::eSuccess) {
throw std::runtime_error("Failed to create pipeline result: " +
vk::to_string(pipelineResult.result));
}
vk::Pipeline& pipeline = pipelineResult.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
#else
vk::Pipeline pipeline =
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
#endif
// TODO: Update to consistent
// this->mPipeline = std::make_shared<vk::Pipeline>();
// this->mDevice->createComputePipelines(
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
// this->mPipeline.get());
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
}
void
Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
*this->mPipeline);
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
*this->mPipelineLayout,
0, // First set
*this->mDescriptorSet,
nullptr // Dispatcher
);
}
void
Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
{
if (this->mPushConstantsSize) {
KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
this->mPushConstantsSize *
this->mPushConstantsDataTypeMemorySize);
commandBuffer.pushConstants(*this->mPipelineLayout,
vk::ShaderStageFlagBits::eCompute,
0,
this->mPushConstantsSize *
this->mPushConstantsDataTypeMemorySize,
this->mPushConstantsData);
}
}
void
Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
commandBuffer.dispatch(
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
}
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
{
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (workgroup[0] > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mWorkgroup = { workgroup[0],
workgroup[1] > 0 ? workgroup[1] : 1,
workgroup[2] > 0 ? workgroup[2] : 1 };
} else {
this->mWorkgroup = { minSize, 1, 1 };
}
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
this->mWorkgroup[0],
this->mWorkgroup[1],
this->mWorkgroup[2]);
}
const Workgroup&
Algorithm::getWorkgroup()
{
return this->mWorkgroup;
}
const std::vector<std::shared_ptr<Tensor>>&
Algorithm::getTensors()
{
return this->mTensors;
}
void Algorithm::setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors)
{
this->mTensors = tensors;
}
}

View file

@ -0,0 +1,82 @@
# SPDX-License-Identifier: Apache-2.0
cmake_minimum_required(VERSION 3.20)
if(KOMPUTE_OPT_ANDROID_BUILD)
find_library(android android)
endif()
cmake_minimum_required(VERSION 3.20)
add_library(kompute Algorithm.cpp
Manager.cpp
OpAlgoDispatch.cpp
OpMemoryBarrier.cpp
OpTensorCopy.cpp
OpTensorSyncDevice.cpp
OpTensorSyncLocal.cpp
OpBufferSyncDevice.cpp
OpBufferSyncLocal.cpp
Sequence.cpp
Tensor.cpp
Core.cpp)
add_library(kompute::kompute ALIAS kompute)
# Set version for shared libraries.
set_target_properties(kompute
PROPERTIES
VERSION ${${PROJECT_NAME}_VERSION}
SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR})
# Import GNU common install directory variables
include(GNUInstallDirs)
install(TARGETS kompute
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
# Include CMake helpers for package config files
# Follow this installation guideline: https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html
include(CMakePackageConfigHelpers)
configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/komputeConfig.cmake.in
"${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake
${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute)
# ####################################################
# Linking
# ####################################################
if(KOMPUTE_OPT_ANDROID_BUILD)
target_link_libraries(kompute PUBLIC vulkanAndroid
android
kp_logger
kp_shader
fmt::fmt)
else()
target_link_libraries(kompute PUBLIC Vulkan::Vulkan
kp_logger
kp_shader
fmt::fmt)
endif()
if(KOMPUTE_OPT_BUILD_PYTHON)
include_directories(${PYTHON_INCLUDE_DIRS})
target_link_libraries(kompute PRIVATE pybind11::headers ${PYTHON_LIBRARIES})
endif()
if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER)
target_link_libraries(kompute PUBLIC Vulkan-Headers)
endif()
# ####################################################
# Misc
# ####################################################
add_subdirectory(logger)
add_subdirectory(shaders)
add_subdirectory(include)

27
kompute/src/Core.cpp Normal file
View file

@ -0,0 +1,27 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Core.hpp"
#if VK_USE_PLATFORM_ANDROID_KHR
#ifndef KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#define KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
/**
* Ensures support for dynamic loading of Vulkan functions on Android.
* Acts as a default store for loaded functions.
* More information:
* https://github.com/KhronosGroup/Vulkan-Hpp#vulkan_hpp_default_dispatcher
**/
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#endif // !KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
#endif // VK_USE_PLATFORM_ANDROID_KHR
namespace kp {
} // namespace kp

493
kompute/src/Manager.cpp Normal file
View file

@ -0,0 +1,493 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Manager.hpp"
#include "fmt/format.h"
#include "kompute/logger/Logger.hpp"
#include <fmt/core.h>
#include <iterator>
#include <set>
#include <sstream>
#include <string>
namespace kp {
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
static VKAPI_ATTR VkBool32 VKAPI_CALL
debugMessageCallback(VkDebugReportFlagsEXT /*flags*/,
VkDebugReportObjectTypeEXT /*objectType*/,
uint64_t /*object*/,
size_t /*location*/,
int32_t /*messageCode*/,
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_DEBUG
const char* pLayerPrefix,
const char* pMessage,
#else
const char* /*pLayerPrefix*/,
const char* /*pMessage*/,
#endif
void* /*pUserData*/)
{
KP_LOG_DEBUG("[VALIDATION]: {} - {}", pLayerPrefix, pMessage);
return VK_FALSE;
}
#endif
Manager::Manager()
{
this->mManageResources = true;
// Make sure the logger is setup
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
logger::setupLogger();
#endif
this->createInstance();
}
void Manager::initializeDevice(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices,
const std::vector<std::string>& desiredExtensions)
{
this->createDevice(
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
}
Manager::~Manager()
{
KP_LOG_DEBUG("Kompute Manager Destructor started");
this->destroy();
}
void
Manager::destroy()
{
KP_LOG_DEBUG("Kompute Manager destroy() started");
if (this->mDevice == nullptr) {
KP_LOG_ERROR(
"Kompute Manager destructor reached with null Device pointer");
return;
}
if (this->mManageResources && this->mManagedSequences.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
"managed sequences");
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
sq->destroy();
}
}
this->mManagedSequences.clear();
}
if (this->mManageResources && this->mManagedAlgorithms.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
this->mManagedAlgorithms) {
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
algorithm->destroy();
}
}
this->mManagedAlgorithms.clear();
}
if (this->mManageResources && this->mManagedTensors.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
tensor->destroy();
}
}
this->mManagedTensors.clear();
}
if (this->mFreeDevice) {
KP_LOG_INFO("Destroying device");
this->mDevice->destroy(
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDevice = nullptr;
KP_LOG_DEBUG("Kompute Manager Destroyed Device");
}
if (this->mInstance == nullptr) {
KP_LOG_ERROR(
"Kompute Manager destructor reached with null Instance pointer");
return;
}
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
if (this->mDebugReportCallback) {
this->mInstance->destroyDebugReportCallbackEXT(
this->mDebugReportCallback, nullptr, this->mDebugDispatcher);
KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback");
}
#endif
if (this->mFreeInstance) {
this->mInstance->destroy(
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mInstance = nullptr;
KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
}
}
void
Manager::createInstance()
{
KP_LOG_DEBUG("Kompute Manager creating instance");
this->mFreeInstance = true;
vk::ApplicationInfo applicationInfo;
applicationInfo.pApplicationName = "Kompute";
applicationInfo.pEngineName = "Kompute";
applicationInfo.apiVersion = KOMPUTE_VK_API_VERSION;
applicationInfo.engineVersion = KOMPUTE_VK_API_VERSION;
applicationInfo.applicationVersion = KOMPUTE_VK_API_VERSION;
std::vector<const char*> applicationExtensions;
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
applicationExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
#endif
vk::InstanceCreateInfo computeInstanceCreateInfo;
computeInstanceCreateInfo.pApplicationInfo = &applicationInfo;
if (!applicationExtensions.empty()) {
computeInstanceCreateInfo.enabledExtensionCount =
(uint32_t)applicationExtensions.size();
computeInstanceCreateInfo.ppEnabledExtensionNames =
applicationExtensions.data();
}
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
KP_LOG_DEBUG("Kompute Manager adding debug validation layers");
// We'll identify the layers that are supported
std::vector<const char*> validLayerNames;
std::vector<const char*> desiredLayerNames = {
"VK_LAYER_LUNARG_assistant_layer",
"VK_LAYER_LUNARG_standard_validation",
"VK_LAYER_KHRONOS_validation",
};
std::vector<std::string> envLayerNames;
const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS");
if (envLayerNamesVal != nullptr && *envLayerNamesVal != '\0') {
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}",
envLayerNamesVal);
std::istringstream iss(envLayerNamesVal);
std::istream_iterator<std::string> beg(iss);
std::istream_iterator<std::string> end;
envLayerNames = std::vector<std::string>(beg, end);
for (const std::string& layerName : envLayerNames) {
desiredLayerNames.push_back(layerName.c_str());
}
KP_LOG_DEBUG("Desired layers: {}", fmt::join(desiredLayerNames, ", "));
}
// Identify the valid layer names based on the desiredLayerNames
{
std::set<std::string> uniqueLayerNames;
std::vector<vk::LayerProperties> availableLayerProperties =
vk::enumerateInstanceLayerProperties();
for (vk::LayerProperties layerProperties : availableLayerProperties) {
std::string layerName(layerProperties.layerName.data());
uniqueLayerNames.insert(layerName);
}
KP_LOG_DEBUG("Available layers: {}", fmt::join(uniqueLayerNames, ", "));
for (const char* desiredLayerName : desiredLayerNames) {
if (uniqueLayerNames.count(desiredLayerName) != 0) {
validLayerNames.push_back(desiredLayerName);
}
}
}
if (!validLayerNames.empty()) {
KP_LOG_DEBUG(
"Kompute Manager Initializing instance with valid layers: {}",
fmt::join(validLayerNames, ", "));
computeInstanceCreateInfo.enabledLayerCount =
static_cast<uint32_t>(validLayerNames.size());
computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data();
} else {
KP_LOG_WARN("Kompute Manager no valid layer names found from desired "
"layer names");
}
#endif
#if VK_USE_PLATFORM_ANDROID_KHR
vk::DynamicLoader dl;
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
#endif // VK_USE_PLATFORM_ANDROID_KHR
this->mInstance = std::make_shared<vk::Instance>();
vk::createInstance(
&computeInstanceCreateInfo, nullptr, this->mInstance.get());
#if VK_USE_PLATFORM_ANDROID_KHR
VULKAN_HPP_DEFAULT_DISPATCHER.init(*this->mInstance);
#endif // VK_USE_PLATFORM_ANDROID_KHR
KP_LOG_DEBUG("Kompute Manager Instance Created");
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
KP_LOG_DEBUG("Kompute Manager adding debug callbacks");
if (validLayerNames.size() > 0) {
vk::DebugReportFlagsEXT debugFlags =
vk::DebugReportFlagBitsEXT::eError |
vk::DebugReportFlagBitsEXT::eWarning;
vk::DebugReportCallbackCreateInfoEXT debugCreateInfo = {};
debugCreateInfo.pfnCallback =
(PFN_vkDebugReportCallbackEXT)debugMessageCallback;
debugCreateInfo.flags = debugFlags;
this->mDebugDispatcher.init(*this->mInstance, &vkGetInstanceProcAddr);
this->mDebugReportCallback =
this->mInstance->createDebugReportCallbackEXT(
debugCreateInfo, nullptr, this->mDebugDispatcher);
}
#endif
}
void
Manager::clear()
{
if (this->mManageResources) {
this->mManagedTensors.erase(
std::remove_if(begin(this->mManagedTensors),
end(this->mManagedTensors),
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
end(this->mManagedTensors));
this->mManagedAlgorithms.erase(
std::remove_if(
begin(this->mManagedAlgorithms),
end(this->mManagedAlgorithms),
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
end(this->mManagedAlgorithms));
this->mManagedSequences.erase(
std::remove_if(begin(this->mManagedSequences),
end(this->mManagedSequences),
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
end(this->mManagedSequences));
}
}
void
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
uint32_t physicalDeviceIndex,
const std::vector<std::string>& desiredExtensions)
{
KP_LOG_DEBUG("Kompute Manager creating Device");
if (this->mInstance == nullptr) {
throw std::runtime_error("Kompute Manager instance is null");
}
this->mFreeDevice = true;
// Getting an integer that says how many vuklan devices we have
std::vector<vk::PhysicalDevice> physicalDevices =
this->mInstance->enumeratePhysicalDevices();
uint32_t deviceCount = physicalDevices.size();
// This means there are no devices at all
if (deviceCount == 0) {
throw std::runtime_error("Failed to find GPUs with Vulkan support! "
"Maybe you haven't installed vulkan drivers?");
}
// This means that we're exceeding our device limit, for
// example if we have 2 devices, just physicalDeviceIndex
// 0 and 1 are acceptable. Hence, physicalDeviceIndex should
// always be less than deviceCount, else we raise an error
if (!(deviceCount > physicalDeviceIndex)) {
throw std::runtime_error("There is no such physical index or device, "
"please use your existing device");
}
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
this->mPhysicalDevice =
std::make_shared<vk::PhysicalDevice>(physicalDevice);
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_INFO
vk::PhysicalDeviceProperties physicalDeviceProperties =
physicalDevice.getProperties();
#endif
KP_LOG_INFO("Using physical device index {} found {}",
physicalDeviceIndex,
physicalDeviceProperties.deviceName);
if (familyQueueIndices.empty()) {
// Find compute queue
std::vector<vk::QueueFamilyProperties> allQueueFamilyProperties =
physicalDevice.getQueueFamilyProperties();
uint32_t computeQueueFamilyIndex = 0;
bool computeQueueSupported = false;
for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) {
vk::QueueFamilyProperties queueFamilyProperties =
allQueueFamilyProperties[i];
if (queueFamilyProperties.queueFlags &
vk::QueueFlagBits::eCompute) {
computeQueueFamilyIndex = i;
computeQueueSupported = true;
break;
}
}
if (!computeQueueSupported) {
throw std::runtime_error("Compute queue is not supported");
}
this->mComputeQueueFamilyIndices.push_back(computeQueueFamilyIndex);
} else {
this->mComputeQueueFamilyIndices = familyQueueIndices;
}
std::unordered_map<uint32_t, uint32_t> familyQueueCounts;
std::unordered_map<uint32_t, std::vector<float>> familyQueuePriorities;
for (const auto& value : this->mComputeQueueFamilyIndices) {
familyQueueCounts[value]++;
familyQueuePriorities[value].push_back(1.0f);
}
std::unordered_map<uint32_t, uint32_t> familyQueueIndexCount;
std::vector<vk::DeviceQueueCreateInfo> deviceQueueCreateInfos;
for (const auto& familyQueueInfo : familyQueueCounts) {
// Setting the device count to 0
familyQueueIndexCount[familyQueueInfo.first] = 0;
// Creating the respective device queue
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(
vk::DeviceQueueCreateFlags(),
familyQueueInfo.first,
familyQueueInfo.second,
familyQueuePriorities[familyQueueInfo.first].data());
deviceQueueCreateInfos.push_back(deviceQueueCreateInfo);
}
KP_LOG_DEBUG("Kompute Manager desired extension layers {}",
fmt::join(desiredExtensions, ", "));
std::vector<vk::ExtensionProperties> deviceExtensions =
this->mPhysicalDevice->enumerateDeviceExtensionProperties();
std::set<std::string> uniqueExtensionNames;
for (const vk::ExtensionProperties& ext : deviceExtensions) {
uniqueExtensionNames.insert(ext.extensionName);
}
KP_LOG_DEBUG("Kompute Manager available extensions {}",
fmt::join(uniqueExtensionNames, ", "));
std::vector<const char*> validExtensions;
for (const std::string& ext : desiredExtensions) {
if (uniqueExtensionNames.count(ext) != 0) {
validExtensions.push_back(ext.c_str());
}
}
if (desiredExtensions.size() != validExtensions.size()) {
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}",
fmt::join(validExtensions, ", "));
}
vk::PhysicalDeviceFeatures features;
features.shaderInt16 = true;
vk::PhysicalDeviceVulkan11Features features11;
features11.uniformAndStorageBuffer16BitAccess = true;
features11.storageBuffer16BitAccess = true;
features11.pNext = nullptr;
vk::PhysicalDeviceVulkan12Features features12;
features12.storageBuffer8BitAccess = true;
features12.uniformAndStorageBuffer8BitAccess = true;
features12.shaderFloat16 = true;
features12.shaderInt8 = true;
features12.pNext = &features11;
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
deviceQueueCreateInfos.size(),
deviceQueueCreateInfos.data(),
{},
{},
validExtensions.size(),
validExtensions.data(),
&features);
deviceCreateInfo.pNext = &features12;
this->mDevice = std::make_shared<vk::Device>();
vk::Result r = physicalDevice.createDevice(
&deviceCreateInfo, nullptr, this->mDevice.get());
if (r != vk::Result::eSuccess) {
KP_LOG_ERROR("Kompute Manager could not create device");
}
KP_LOG_DEBUG("Kompute Manager device created");
for (const uint32_t& familyQueueIndex : this->mComputeQueueFamilyIndices) {
std::shared_ptr<vk::Queue> currQueue = std::make_shared<vk::Queue>();
this->mDevice->getQueue(familyQueueIndex,
familyQueueIndexCount[familyQueueIndex],
currQueue.get());
familyQueueIndexCount[familyQueueIndex]++;
this->mComputeQueues.push_back(currQueue);
}
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
}
std::shared_ptr<Sequence>
Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
{
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
std::shared_ptr<Sequence> sq{ new kp::Sequence(
this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex],
totalTimestamps) };
if (this->mManageResources) {
this->mManagedSequences.push_back(sq);
}
return sq;
}
vk::PhysicalDeviceProperties
Manager::getDeviceProperties() const
{
return this->mPhysicalDevice->getProperties();
}
std::vector<vk::PhysicalDevice>
Manager::listDevices() const
{
return this->mInstance->enumeratePhysicalDevices();
}
std::shared_ptr<vk::Instance>
Manager::getVkInstance() const
{
return this->mInstance;
}
}

View file

@ -0,0 +1,65 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpAlgoDispatch.hpp"
namespace kp {
OpAlgoDispatch::~OpAlgoDispatch()
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started");
if (this->mPushConstantsData) {
KP_LOG_DEBUG("Kompute freeing push constants data");
free(this->mPushConstantsData);
}
}
void
OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch record called");
// Barrier to ensure the data is finished writing to buffer memory
for (const std::shared_ptr<Tensor>& tensor :
this->mAlgorithm->getTensors()) {
tensor->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eComputeShader);
}
if (this->mPushConstantsSize) {
this->mAlgorithm->setPushConstants(
this->mPushConstantsData,
this->mPushConstantsSize,
this->mPushConstantsDataTypeMemorySize);
}
this->mAlgorithm->recordBindCore(commandBuffer);
this->mAlgorithm->recordBindPush(commandBuffer);
this->mAlgorithm->recordDispatch(commandBuffer);
}
void
OpAlgoDispatch::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called");
}
void
OpAlgoDispatch::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called");
}
}

View file

@ -0,0 +1,51 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpBufferSyncDevice.hpp"
namespace kp {
OpBufferSyncDevice::OpBufferSyncDevice(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size)
: mPrimaryBuffer(primaryBuffer)
, mStagingBuffer(stagingBuffer)
, mSize(size)
{
KP_LOG_DEBUG("Kompute OpBufferSyncDevice constructor with params");
}
OpBufferSyncDevice::~OpBufferSyncDevice()
{
KP_LOG_DEBUG("Kompute OpBufferSyncDevice destructor started");
}
void
OpBufferSyncDevice::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpBufferSyncDevice record called");
vk::BufferCopy copyRegion(0, 0, mSize);
commandBuffer.copyBuffer(*mStagingBuffer, *mPrimaryBuffer, copyRegion);
}
void
OpBufferSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncDevice preEval called");
}
void
OpBufferSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncDevice postEval called");
}
}

View file

@ -0,0 +1,51 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpBufferSyncLocal.hpp"
namespace kp {
OpBufferSyncLocal::OpBufferSyncLocal(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size)
: mPrimaryBuffer(primaryBuffer)
, mStagingBuffer(stagingBuffer)
, mSize(size)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal constructor with params");
}
OpBufferSyncLocal::~OpBufferSyncLocal()
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal destructor started");
}
void
OpBufferSyncLocal::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal record called");
vk::BufferCopy copyRegion(0, 0, mSize);
commandBuffer.copyBuffer(*mPrimaryBuffer, *mStagingBuffer, copyRegion);
}
void
OpBufferSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal preEval called");
}
void
OpBufferSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpBufferSyncLocal postEval called");
}
}

View file

@ -0,0 +1,74 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpMemoryBarrier.hpp"
namespace kp {
OpMemoryBarrier::OpMemoryBarrier(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary)
: mSrcAccessMask(srcAccessMask)
, mDstAccessMask(dstAccessMask)
, mSrcStageMask(srcStageMask)
, mDstStageMask(dstStageMask)
, mBarrierOnPrimary(barrierOnPrimary)
, mTensors(tensors)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor");
}
OpMemoryBarrier::~OpMemoryBarrier()
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier destructor started");
}
void
OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier record called");
// Barrier to ensure the data is finished writing to buffer memory
if (this->mBarrierOnPrimary) {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordPrimaryBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
} else {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordStagingBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
}
}
void
OpMemoryBarrier::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier preEval called");
}
void
OpMemoryBarrier::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier postSubmit called");
}
}

View file

@ -0,0 +1,90 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpTensorCopy.hpp"
#include "kompute/Tensor.hpp"
namespace kp {
OpTensorCopy::OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors)
{
KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params");
this->mTensors = tensors;
if (this->mTensors.size() < 2) {
throw std::runtime_error(
"Kompute OpTensorCopy called with less than 2 tensor");
}
kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType();
uint32_t size = this->mTensors[0]->size();
for (const std::shared_ptr<Tensor>& tensor : tensors) {
if (tensor->dataType() != dataType) {
throw std::runtime_error(fmt::format(
"Attempting to copy tensors of different types from {} to {}",
Tensor::toString(dataType),
Tensor::toString(tensor->dataType())));
}
if (tensor->size() != size) {
throw std::runtime_error(fmt::format(
"Attempting to copy tensors of different sizes from {} to {}",
size,
tensor->size()));
}
}
}
OpTensorCopy::~OpTensorCopy()
{
KP_LOG_DEBUG("Kompute OpTensorCopy destructor started");
}
void
OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCopy record called");
// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(commandBuffer, this->mTensors[0]);
}
}
void
OpTensorCopy::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorCopy preEval called");
}
void
OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
// Do not copy on CPU side if source is storage tensor
if (this->mTensors[0]->tensorType() == kp::Tensor::TensorTypes::eStorage)
{
KP_LOG_DEBUG("Kompute OpTensorCopy not copying tensor source given it's of eStorage type");
return;
}
void* data = this->mTensors[0]->rawData();
// Copy the data from the first tensor into all the tensors
for (size_t i = 1; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == kp::Tensor::TensorTypes::eStorage) {
KP_LOG_DEBUG("Kompute OpTensorCopy not copying to tensor dest given it's of eStorage type");
continue;
}
this->mTensors[i]->setRawData(data);
}
}
}

View file

@ -0,0 +1,61 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/operations/OpTensorSyncDevice.hpp"
namespace kp {
OpTensorSyncDevice::OpTensorSyncDevice(
const std::vector<std::shared_ptr<Tensor>>& tensors)
: mPrimaryBuffer(nullptr)
, mStagingBuffer(nullptr)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
if (tensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorSyncDevice called with less than 1 tensor");
}
this->mTensors = tensors;
}
OpTensorSyncDevice::~OpTensorSyncDevice()
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");
this->mTensors.clear();
}
void
OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer);
}
}
}
void
OpTensorSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called");
}
void
OpTensorSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice postEval called");
}
}

View file

@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpTensorSyncLocal.hpp"
namespace kp {
OpTensorSyncLocal::OpTensorSyncLocal(
const std::vector<std::shared_ptr<Tensor>>& tensors)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
if (tensors.size() < 1) {
throw std::runtime_error(
"Kompute OpTensorSyncLocal called with less than 1 tensor");
}
this->mTensors = tensors;
}
OpTensorSyncLocal::~OpTensorSyncLocal()
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal destructor started");
}
void
OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called");
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
}
}
void
OpTensorSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal preEval called");
}
void
OpTensorSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called");
KP_LOG_DEBUG("Kompute OpTensorSyncLocal mapping data into tensor local");
}
}

396
kompute/src/Sequence.cpp Normal file
View file

@ -0,0 +1,396 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Sequence.hpp"
namespace kp {
Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::Queue> computeQueue,
uint32_t queueIndex,
uint32_t totalTimestamps)
{
KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue");
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->mComputeQueue = computeQueue;
this->mQueueIndex = queueIndex;
this->createCommandPool();
this->createCommandBuffer();
if (totalTimestamps > 0)
this->createTimestampQueryPool(totalTimestamps +
1); //+1 for the first one
}
Sequence::~Sequence()
{
KP_LOG_DEBUG("Kompute Sequence Destructor started");
if (this->mDevice) {
this->destroy();
}
}
void
Sequence::begin()
{
KP_LOG_DEBUG("Kompute sequence called BEGIN");
if (this->isRecording()) {
KP_LOG_DEBUG("Kompute Sequence begin called when already recording");
return;
}
if (this->isRunning()) {
throw std::runtime_error(
"Kompute Sequence begin called when sequence still running");
}
KP_LOG_INFO("Kompute Sequence command now started recording");
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
this->mRecording = true;
// latch the first timestamp before any commands are submitted
if (this->timestampQueryPool)
this->mCommandBuffer->writeTimestamp(
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool,
0);
}
void
Sequence::end()
{
KP_LOG_DEBUG("Kompute Sequence calling END");
if (this->isRunning()) {
throw std::runtime_error(
"Kompute Sequence begin called when sequence still running");
}
if (!this->isRecording()) {
KP_LOG_WARN("Kompute Sequence end called when not recording");
return;
} else {
KP_LOG_INFO("Kompute Sequence command recording END");
this->mCommandBuffer->end();
this->mRecording = false;
}
}
void
Sequence::clear()
{
KP_LOG_DEBUG("Kompute Sequence calling clear");
if (this->isRecording()) {
this->end();
}
}
std::shared_ptr<Sequence>
Sequence::eval()
{
KP_LOG_DEBUG("Kompute sequence EVAL BEGIN");
return this->evalAsync()->evalAwait();
}
std::shared_ptr<Sequence>
Sequence::eval(std::shared_ptr<OpBase> op)
{
this->clear();
return this->record(op)->eval();
}
std::shared_ptr<Sequence>
Sequence::evalAsync()
{
if (this->isRecording()) {
this->end();
}
if (this->mIsRunning) {
throw std::runtime_error(
"Kompute Sequence evalAsync called when an eval async was "
"called without successful wait");
}
this->mIsRunning = true;
for (size_t i = 0; i < this->mOperations.size(); i++) {
this->mOperations[i]->preEval(*this->mCommandBuffer);
}
vk::SubmitInfo submitInfo(
0, nullptr, nullptr, 1, this->mCommandBuffer.get());
this->mFence = this->mDevice->createFence(vk::FenceCreateInfo());
KP_LOG_DEBUG(
"Kompute sequence submitting command buffer into compute queue");
this->mComputeQueue->submit(1, &submitInfo, this->mFence);
return shared_from_this();
}
std::shared_ptr<Sequence>
Sequence::evalAsync(std::shared_ptr<OpBase> op)
{
this->clear();
this->record(op);
this->evalAsync();
return shared_from_this();
}
std::shared_ptr<Sequence>
Sequence::evalAwait(uint64_t waitFor)
{
if (!this->mIsRunning) {
KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval");
return shared_from_this();
}
vk::Result result =
this->mDevice->waitForFences(1, &this->mFence, VK_TRUE, waitFor);
this->mDevice->destroy(
this->mFence, (vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mIsRunning = false;
if (result == vk::Result::eTimeout) {
KP_LOG_WARN("Kompute Sequence evalAwait reached timeout of {}",
waitFor);
return shared_from_this();
}
for (size_t i = 0; i < this->mOperations.size(); i++) {
this->mOperations[i]->postEval(*this->mCommandBuffer);
}
return shared_from_this();
}
bool
Sequence::isRunning() const
{
return this->mIsRunning;
}
bool
Sequence::isRecording() const
{
return this->mRecording;
}
bool
Sequence::isInit() const
{
return this->mDevice && this->mCommandPool && this->mCommandBuffer &&
this->mComputeQueue;
}
void
Sequence::rerecord()
{
this->end();
std::vector<std::shared_ptr<OpBase>> ops = this->mOperations;
this->mOperations.clear();
for (const std::shared_ptr<kp::OpBase>& op : ops) {
this->record(op);
}
}
void
Sequence::destroy()
{
KP_LOG_DEBUG("Kompute Sequence destroy called");
if (!this->mDevice) {
KP_LOG_WARN("Kompute Sequence destroy called "
"with null Device pointer");
return;
}
if (this->mFreeCommandBuffer) {
KP_LOG_INFO("Freeing CommandBuffer");
if (!this->mCommandBuffer) {
KP_LOG_WARN("Kompute Sequence destroy called with null "
"CommandPool pointer");
return;
}
this->mDevice->freeCommandBuffers(
*this->mCommandPool, 1, this->mCommandBuffer.get());
this->mCommandBuffer = nullptr;
this->mFreeCommandBuffer = false;
KP_LOG_DEBUG("Kompute Sequence Freed CommandBuffer");
}
if (this->mFreeCommandPool) {
KP_LOG_INFO("Destroying CommandPool");
if (this->mCommandPool == nullptr) {
KP_LOG_WARN("Kompute Sequence destroy called with null "
"CommandPool pointer");
return;
}
this->mDevice->destroy(
*this->mCommandPool,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mCommandPool = nullptr;
this->mFreeCommandPool = false;
KP_LOG_DEBUG("Kompute Sequence Destroyed CommandPool");
}
if (this->mOperations.size()) {
KP_LOG_INFO("Kompute Sequence clearing operations buffer");
this->mOperations.clear();
}
if (this->timestampQueryPool) {
KP_LOG_INFO("Destroying QueryPool");
this->mDevice->destroy(
*this->timestampQueryPool,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->timestampQueryPool = nullptr;
KP_LOG_DEBUG("Kompute Sequence Destroyed QueryPool");
}
if (this->mDevice) {
this->mDevice = nullptr;
}
if (this->mPhysicalDevice) {
this->mPhysicalDevice = nullptr;
}
if (this->mComputeQueue) {
this->mComputeQueue = nullptr;
}
}
std::shared_ptr<Sequence>
Sequence::record(std::shared_ptr<OpBase> op)
{
KP_LOG_DEBUG("Kompute Sequence record function started");
this->begin();
KP_LOG_DEBUG(
"Kompute Sequence running record on OpBase derived class instance");
op->record(*this->mCommandBuffer);
this->mOperations.push_back(op);
if (this->timestampQueryPool)
this->mCommandBuffer->writeTimestamp(
vk::PipelineStageFlagBits::eAllCommands,
*this->timestampQueryPool,
this->mOperations.size());
return shared_from_this();
}
void
Sequence::createCommandPool()
{
KP_LOG_DEBUG("Kompute Sequence creating command pool");
if (!this->mDevice) {
throw std::runtime_error("Kompute Sequence device is null");
}
this->mFreeCommandPool = true;
vk::CommandPoolCreateInfo commandPoolInfo(vk::CommandPoolCreateFlags(),
this->mQueueIndex);
this->mCommandPool = std::make_shared<vk::CommandPool>();
this->mDevice->createCommandPool(
&commandPoolInfo, nullptr, this->mCommandPool.get());
KP_LOG_DEBUG("Kompute Sequence Command Pool Created");
}
void
Sequence::createCommandBuffer()
{
KP_LOG_DEBUG("Kompute Sequence creating command buffer");
if (!this->mDevice) {
throw std::runtime_error("Kompute Sequence device is null");
}
if (!this->mCommandPool) {
throw std::runtime_error("Kompute Sequence command pool is null");
}
this->mFreeCommandBuffer = true;
vk::CommandBufferAllocateInfo commandBufferAllocateInfo(
*this->mCommandPool, vk::CommandBufferLevel::ePrimary, 1);
this->mCommandBuffer = std::make_shared<vk::CommandBuffer>();
this->mDevice->allocateCommandBuffers(&commandBufferAllocateInfo,
this->mCommandBuffer.get());
KP_LOG_DEBUG("Kompute Sequence Command Buffer Created");
}
void
Sequence::createTimestampQueryPool(uint32_t totalTimestamps)
{
KP_LOG_DEBUG("Kompute Sequence creating query pool");
if (!this->isInit()) {
throw std::runtime_error(
"createTimestampQueryPool() called on uninitialized Sequence");
}
if (!this->mPhysicalDevice) {
throw std::runtime_error("Kompute Sequence physical device is null");
}
vk::PhysicalDeviceProperties physicalDeviceProperties =
this->mPhysicalDevice->getProperties();
if (physicalDeviceProperties.limits.timestampComputeAndGraphics) {
vk::QueryPoolCreateInfo queryPoolInfo;
queryPoolInfo.setQueryCount(totalTimestamps);
queryPoolInfo.setQueryType(vk::QueryType::eTimestamp);
this->timestampQueryPool = std::make_shared<vk::QueryPool>(
this->mDevice->createQueryPool(queryPoolInfo));
KP_LOG_DEBUG("Query pool for timestamps created");
} else {
throw std::runtime_error("Device does not support timestamps");
}
}
std::vector<std::uint64_t>
Sequence::getTimestamps()
{
if (!this->timestampQueryPool)
throw std::runtime_error("Timestamp latching not enabled");
const auto n = this->mOperations.size() + 1;
std::vector<std::uint64_t> timestamps(n, 0);
this->mDevice->getQueryPoolResults(
*this->timestampQueryPool,
0,
n,
timestamps.size() * sizeof(std::uint64_t),
timestamps.data(),
sizeof(uint64_t),
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait);
return timestamps;
}
}

451
kompute/src/Tensor.cpp Normal file
View file

@ -0,0 +1,451 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Tensor.hpp"
namespace kp {
std::string
Tensor::toString(Tensor::TensorDataTypes dt)
{
switch (dt) {
case TensorDataTypes::eBool:
return "eBool";
case TensorDataTypes::eInt:
return "eInt";
case TensorDataTypes::eUnsignedInt:
return "eUnsignedInt";
case TensorDataTypes::eFloat:
return "eFloat";
case TensorDataTypes::eDouble:
return "eDouble";
default:
return "unknown";
}
}
std::string
Tensor::toString(Tensor::TensorTypes dt)
{
switch (dt) {
case TensorTypes::eDevice:
return "eDevice";
case TensorTypes::eHost:
return "eHost";
case TensorTypes::eStorage:
return "eStorage";
default:
return "unknown";
}
}
Tensor::Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
void* data,
uint32_t elementTotalCount,
uint32_t elementMemorySize,
const TensorDataTypes& dataType,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset,
const TensorTypes& tensorType)
{
KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}",
elementTotalCount,
Tensor::toString(tensorType));
this->mPhysicalDevice = physicalDevice;
this->mDevice = device;
this->mDataType = dataType;
this->mTensorType = tensorType;
this->rebuild(data, elementTotalCount, elementMemorySize, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset);
}
Tensor::~Tensor()
{
KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}",
Tensor::toString(this->tensorType()));
if (this->mDevice) {
this->destroy();
}
KP_LOG_DEBUG("Kompute Tensor destructor success");
}
void
Tensor::rebuild(void* /*data*/,
uint32_t elementTotalCount,
uint64_t memorySize,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset)
{
KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount);
this->mSize = elementTotalCount;
this->mMemorySize = memorySize;
this->mOffset = offset;
if (this->mPrimaryBuffer || this->mPrimaryMemory) {
KP_LOG_DEBUG(
"Kompute Tensor destroying existing resources before rebuild");
this->destroy();
}
this->setGPUResources(primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset);
}
Tensor::TensorTypes
Tensor::tensorType()
{
return this->mTensorType;
}
bool
Tensor::isInit()
{
return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory &&
this->mRawData;
}
uint32_t
Tensor::size()
{
return this->mSize;
}
uint64_t
Tensor::memorySize()
{
return this->mMemorySize;
}
kp::Tensor::TensorDataTypes
Tensor::dataType()
{
return this->mDataType;
}
void*
Tensor::rawData()
{
return this->mRawData;
}
void
Tensor::setRawData(const void* data)
{
memcpy(this->mRawData, data, this->memorySize());
}
void
Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor)
{
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
copyFromTensor->mPrimaryBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion);
}
void
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
{
if (!this->mStagingBuffer)
return;
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
this->mStagingBuffer,
this->mPrimaryBuffer,
bufferSize,
copyRegion);
}
void
Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer)
{
if (!this->mStagingBuffer)
return;
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize);
KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);
this->recordCopyBuffer(commandBuffer,
this->mPrimaryBuffer,
this->mStagingBuffer,
bufferSize,
copyRegion);
}
void
Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
vk::Buffer *bufferFrom,
vk::Buffer *bufferTo,
vk::DeviceSize /*bufferSize*/,
vk::BufferCopy copyRegion)
{
commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion);
}
void
Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mPrimaryBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
if (!this->mStagingBuffer)
return;
KP_LOG_DEBUG("Kompute Tensor recording STAGING buffer memory barrier");
this->recordBufferMemoryBarrier(commandBuffer,
*this->mStagingBuffer,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask);
}
void
Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask)
{
KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier");
vk::DeviceSize bufferSize = this->memorySize();
vk::BufferMemoryBarrier bufferMemoryBarrier;
bufferMemoryBarrier.buffer = buffer;
bufferMemoryBarrier.size = bufferSize;
bufferMemoryBarrier.srcAccessMask = srcAccessMask;
bufferMemoryBarrier.dstAccessMask = dstAccessMask;
bufferMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
commandBuffer.pipelineBarrier(srcStageMask,
dstStageMask,
vk::DependencyFlags(),
nullptr,
bufferMemoryBarrier,
nullptr);
}
vk::DescriptorBufferInfo
Tensor::constructDescriptorBufferInfo()
{
KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}",
this->memorySize());
vk::DeviceSize bufferSize = this->memorySize();
return vk::DescriptorBufferInfo(*this->mPrimaryBuffer,
mOffset, // offset
bufferSize);
}
vk::BufferUsageFlags
Tensor::getPrimaryBufferUsageFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
case TensorTypes::eHost:
return vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
case TensorTypes::eStorage:
return vk::BufferUsageFlagBits::eStorageBuffer;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
vk::MemoryPropertyFlags
Tensor::getPrimaryMemoryPropertyFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::MemoryPropertyFlagBits::eDeviceLocal;
break;
case TensorTypes::eHost:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
break;
case TensorTypes::eStorage:
return vk::MemoryPropertyFlagBits::eDeviceLocal;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
vk::BufferUsageFlags
Tensor::getStagingBufferUsageFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
vk::MemoryPropertyFlags
Tensor::getStagingMemoryPropertyFlags()
{
switch (this->mTensorType) {
case TensorTypes::eDevice:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
break;
default:
throw std::runtime_error("Kompute Tensor invalid tensor type");
}
}
void
Tensor::setGPUResources(vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize /*offset*/)
{
KP_LOG_DEBUG("Kompute Tensor creating buffer");
if (!this->mPhysicalDevice) {
throw std::runtime_error("Kompute Tensor phyisical device is null");
}
if (!this->mDevice) {
throw std::runtime_error("Kompute Tensor device is null");
}
KP_LOG_DEBUG("Kompute Tensor creating primary buffer and memory");
this->mPrimaryBuffer = primaryBuffer;
this->mPrimaryMemory = primaryMemory;
if (this->mTensorType == TensorTypes::eDevice) {
KP_LOG_DEBUG("Kompute Tensor creating staging buffer and memory");
this->mStagingBuffer = stagingBuffer;
this->mStagingMemory = stagingMemory;
}
KP_LOG_DEBUG("Kompute Tensor buffer & memory creation successful");
}
void
Tensor::destroy()
{
KP_LOG_DEBUG("Kompute Tensor started destroy()");
// Setting raw data to null regardless whether device is available to
// invalidate Tensor
this->mRawData = nullptr;
this->mSize = 0;
this->mMemorySize = 0;
if (!this->mDevice) {
KP_LOG_WARN(
"Kompute Tensor destructor reached with null Device pointer");
return;
}
if (this->mDevice) {
this->mDevice = nullptr;
}
KP_LOG_DEBUG("Kompute Tensor successful destroy()");
}
template<>
Tensor::TensorDataTypes
TensorT<bool>::dataType()
{
return Tensor::TensorDataTypes::eBool;
}
template<>
Tensor::TensorDataTypes
TensorT<int32_t>::dataType()
{
return Tensor::TensorDataTypes::eInt;
}
template<>
Tensor::TensorDataTypes
TensorT<uint32_t>::dataType()
{
return Tensor::TensorDataTypes::eUnsignedInt;
}
template<>
Tensor::TensorDataTypes
TensorT<float>::dataType()
{
return Tensor::TensorDataTypes::eFloat;
}
template<>
Tensor::TensorDataTypes
TensorT<double>::dataType()
{
return Tensor::TensorDataTypes::eDouble;
}
}

View file

@ -0,0 +1,46 @@
cmake_minimum_required(VERSION 3.20)
# ####################################################
# Kompute
# ####################################################
target_include_directories(kompute PUBLIC $<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
target_sources(kompute PRIVATE
# Header files (useful in IDEs)
kompute/Algorithm.hpp
kompute/Core.hpp
kompute/Kompute.hpp
kompute/Manager.hpp
kompute/Sequence.hpp
kompute/Tensor.hpp
kompute/operations/OpAlgoDispatch.hpp
kompute/operations/OpBase.hpp
kompute/operations/OpMemoryBarrier.hpp
kompute/operations/OpMult.hpp
kompute/operations/OpTensorCopy.hpp
kompute/operations/OpTensorSyncDevice.hpp
kompute/operations/OpTensorSyncLocal.hpp
kompute/operations/OpBufferSyncDevice.hpp
kompute/operations/OpBufferSyncLocal.hpp
kompute/logger/Logger.hpp
)
install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# ####################################################
# Logger
# ####################################################
target_include_directories(kp_logger PUBLIC $<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
target_sources(kp_logger PRIVATE
# Header files (useful in IDEs)
kompute/logger/Logger.hpp
)
install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

View file

@ -0,0 +1,338 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#pragma once
#include "kompute/Core.hpp"
#include "fmt/format.h"
#include "kompute/Tensor.hpp"
#include "logger/Logger.hpp"
namespace kp {
/**
Abstraction for compute shaders that are run on top of tensors grouped via
ParameterGroups (which group descriptorsets)
*/
class Algorithm
{
public:
/**
* Main constructor for algorithm with configuration parameters to create
* the underlying resources.
*
* @param device The Vulkan device to use for creating resources
* @param tensors (optional) The tensors to use to create the descriptor
* resources
* @param spirv (optional) The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The templatable param is to be
* used to initialize the specialization constants which cannot be changed
* once set.
* @param pushConstants (optional) This templatable param is to be used
* when initializing the pipeline, which set the size of the push constants
* - these can be modified but all new values must have the same data type
* and length as otherwise it will result in errors.
*/
template<typename S = float, typename P = float>
Algorithm(std::shared_ptr<vk::Device> device,
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const std::vector<S>& specializationConstants = {},
const std::vector<P>& pushConstants = {})
{
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
this->mDevice = device;
this->mDescriptorPool = pool;
if (tensors.size() && spirv.size()) {
KP_LOG_INFO(
"Kompute Algorithm initialising with tensor size: {} and "
"spirv size: {}",
tensors.size(),
spirv.size());
this->rebuild(tensors,
spirv,
workgroup,
specializationConstants,
pushConstants);
} else {
KP_LOG_INFO(
"Kompute Algorithm constructor with empty tensors and or "
"spirv so not rebuilding vulkan components");
}
}
/**
* Rebuild function to reconstruct algorithm with configuration parameters
* to create the underlying resources.
*
* @param tensors The tensors to use to create the descriptor resources
* @param spirv The spirv code to use to create the algorithm
* @param workgroup (optional) The kp::Workgroup to use for the dispatch
* which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set.
* @param specializationConstants (optional) The std::vector<float> to use
* to initialize the specialization constants which cannot be changed once
* set.
* @param pushConstants (optional) The std::vector<float> to use when
* initializing the pipeline, which set the size of the push constants -
* these can be modified but all new values must have the same vector size
* as this initial value.
*/
template<typename S = float, typename P = float>
void rebuild(const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup = {},
const std::vector<S>& specializationConstants = {},
const std::vector<P>& pushConstants = {})
{
KP_LOG_DEBUG("Kompute Algorithm rebuild started");
this->mTensors = tensors;
this->mSpirv = spirv;
if (specializationConstants.size()) {
if (this->mSpecializationConstantsData) {
free(this->mSpecializationConstantsData);
}
uint32_t memorySize =
sizeof(decltype(specializationConstants.back()));
uint32_t size = specializationConstants.size();
uint32_t totalSize = size * memorySize;
this->mSpecializationConstantsData = malloc(totalSize);
memcpy(this->mSpecializationConstantsData,
specializationConstants.data(),
totalSize);
this->mSpecializationConstantsDataTypeMemorySize = memorySize;
this->mSpecializationConstantsSize = size;
}
if (pushConstants.size()) {
if (this->mPushConstantsData) {
free(this->mPushConstantsData);
}
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
uint32_t size = pushConstants.size();
uint32_t totalSize = size * memorySize;
this->mPushConstantsData = malloc(totalSize);
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
this->mPushConstantsDataTypeMemorySize = memorySize;
this->mPushConstantsSize = size;
}
this->setWorkgroup(
workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
// Descriptor pool is created first so if available then destroy all
// before rebuild
if (this->isInit()) {
this->destroy();
}
this->createParameters();
this->createShaderModule();
this->createPipeline();
}
/**
* Destructor for Algorithm which is responsible for freeing and desroying
* respective pipelines and owned parameter groups.
*/
~Algorithm();
/**
* Records the dispatch function with the provided template parameters or
* alternatively using the size of the tensor by default.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordDispatch(const vk::CommandBuffer& commandBuffer);
/**
* Records command that binds the "core" algorithm components which consist
* of binding the pipeline and binding the descriptorsets.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindCore(const vk::CommandBuffer& commandBuffer);
/**
* Records command that binds the push constants to the command buffer
* provided
* - it is required that the pushConstants provided are of the same size as
* the ones provided during initialization.
*
* @param commandBuffer Command buffer to record the algorithm resources to
*/
void recordBindPush(const vk::CommandBuffer& commandBuffer);
/**
* function that checks all the gpu resource components to verify if these
* have been created and returns true if all are valid.
*
* @returns returns true if the algorithm is currently initialized.
*/
bool isInit();
/**
* Sets the work group to use in the recordDispatch
*
* @param workgroup The kp::Workgroup value to use to update the algorithm.
* It must have a value greater than 1 on the x value (index 1) otherwise it
* will be initialized on the size of the first tensor (ie.
* this->mTensor[0]->size())
*/
void setWorkgroup(const Workgroup& workgroup, uint32_t minSize = 1);
/**
* Sets the push constants to the new value provided to use in the next
* bindPush()
*
* @param pushConstants The templatable vector is to be used to set the push
* constants to use in the next bindPush(...) calls. The constants provided
* must be of the same size as the ones created during initialization.
*/
template<typename T>
void setPushConstants(const std::vector<T>& pushConstants)
{
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
uint32_t size = pushConstants.size();
this->setPushConstants(pushConstants.data(), size, memorySize);
}
void updateDescriptors(vk::DescriptorPool *pool)
{
this->mDescriptorPool = pool;
this->setWorkgroup(
this->mWorkgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1);
this->updateParameters(); // TODO: See if we can reduce this
}
/**
* Sets the push constants to the new value provided to use in the next
* bindPush() with the raw memory block location and memory size to be used.
*
* @param data The raw data point to copy the data from, without modifying
* the pointer.
* @param size The number of data elements provided in the data
* @param memorySize The memory size of each of the data elements in bytes.
*/
void setPushConstants(const void* data, uint32_t size, uint32_t memorySize)
{
uint32_t totalSize = memorySize * size;
uint32_t previousTotalSize =
this->mPushConstantsDataTypeMemorySize * this->mPushConstantsSize;
if (totalSize != previousTotalSize) {
throw std::runtime_error(fmt::format(
"Kompute Algorithm push "
"constant total memory size provided is {} but expected {} bytes",
totalSize,
previousTotalSize));
}
if (this->mPushConstantsData) {
free(this->mPushConstantsData);
}
this->mPushConstantsData = malloc(totalSize);
memcpy(this->mPushConstantsData, data, totalSize);
this->mPushConstantsDataTypeMemorySize = memorySize;
this->mPushConstantsSize = size;
}
/**
* Gets the current workgroup from the algorithm.
*
* @param The kp::Constant to use to set the push constants to use in the
* next bindPush(...) calls. The constants provided must be of the same size
* as the ones created during initialization.
*/
const Workgroup& getWorkgroup();
/**
* Gets the specialization constants of the current algorithm.
*
* @returns The std::vector<float> currently set for specialization
* constants
*/
template<typename T>
const std::vector<T> getSpecializationConstants()
{
return { (T*)this->mSpecializationConstantsData,
((T*)this->mSpecializationConstantsData) +
this->mSpecializationConstantsSize };
}
/**
* Gets the specialization constants of the current algorithm.
*
* @returns The std::vector<float> currently set for push constants
*/
template<typename T>
const std::vector<T> getPushConstants()
{
return { (T*)this->mPushConstantsData,
((T*)this->mPushConstantsData) + this->mPushConstantsSize };
}
/**
* Gets the current tensors that are used in the algorithm.
*
* @returns The list of tensors used in the algorithm.
*/
const std::vector<std::shared_ptr<Tensor>>& getTensors();
void setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors);
void destroy();
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::Device> mDevice;
std::vector<std::shared_ptr<Tensor>> mTensors;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::DescriptorSetLayout> mDescriptorSetLayout;
bool mFreeDescriptorSetLayout = false;
vk::DescriptorPool *mDescriptorPool = nullptr;
std::shared_ptr<vk::DescriptorSet> mDescriptorSet;
bool mFreeDescriptorSet = false;
std::shared_ptr<vk::ShaderModule> mShaderModule;
bool mFreeShaderModule = false;
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
bool mFreePipelineLayout = false;
std::shared_ptr<vk::PipelineCache> mPipelineCache;
bool mFreePipelineCache = false;
std::shared_ptr<vk::Pipeline> mPipeline;
bool mFreePipeline = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<uint32_t> mSpirv;
void* mSpecializationConstantsData = nullptr;
uint32_t mSpecializationConstantsDataTypeMemorySize = 0;
uint32_t mSpecializationConstantsSize = 0;
void* mPushConstantsData = nullptr;
uint32_t mPushConstantsDataTypeMemorySize = 0;
uint32_t mPushConstantsSize = 0;
Workgroup mWorkgroup;
// Create util functions
void createShaderModule();
void createPipeline();
// Parameters
void freeParameters();
void createParameters();
void updateParameters();
};
} // End namespace kp

View file

@ -0,0 +1,39 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#pragma once
#include <vulkan/vulkan.hpp>
// Typedefs to simplify interaction with core types
namespace kp {
typedef std::array<uint32_t, 3> Workgroup;
typedef std::vector<float> Constants;
}
// Must be after vulkan is included
#ifndef KOMPUTE_VK_API_VERSION
#ifndef KOMPUTE_VK_API_MAJOR_VERSION
#define KOMPUTE_VK_API_MAJOR_VERSION 1
#endif // KOMPUTE_VK_API_MAJOR_VERSION
#ifndef KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_MINOR_VERSION 2
#endif // KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_VERSION \
VK_MAKE_VERSION( \
KOMPUTE_VK_API_MAJOR_VERSION, KOMPUTE_VK_API_MINOR_VERSION, 0)
#endif // KOMPUTE_VK_API_VERSION
#if defined(KOMPUTE_BUILD_PYTHON)
#include <pybind11/pybind11.h>
namespace py = pybind11;
// from python/src/main.cpp
extern py::object kp_trace, kp_debug, kp_info, kp_warning, kp_error;
#endif

View file

@ -0,0 +1,21 @@
#pragma once
#include "Algorithm.hpp"
#include "Core.hpp"
#include "Manager.hpp"
#include "Sequence.hpp"
#include "Tensor.hpp"
#include "operations/OpAlgoDispatch.hpp"
#include "operations/OpBase.hpp"
#include "operations/OpMemoryBarrier.hpp"
#include "operations/OpMult.hpp"
#include "operations/OpTensorCopy.hpp"
#include "operations/OpTensorSyncDevice.hpp"
#include "operations/OpTensorSyncLocal.hpp"
#include "operations/OpBufferSyncDevice.hpp"
#include "operations/OpBufferSyncLocal.hpp"
// Will be build by CMake and placed inside the build directory
#include "ShaderLogisticRegression.hpp"
#include "ShaderOpMult.hpp"

View file

@ -0,0 +1,267 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#pragma once
#include <set>
#include <unordered_map>
#include "kompute/Core.hpp"
#include "kompute/Sequence.hpp"
#include "logger/Logger.hpp"
#define KP_DEFAULT_SESSION "DEFAULT"
namespace kp {
/**
Base orchestrator which creates and manages device and child components
*/
class Manager
{
public:
/**
Base constructor.
*/
Manager();
/**
* Manager destructor which would ensure all owned resources are destroyed
* unless explicitly stated that resources should not be destroyed or freed.
*/
~Manager();
bool hasDevice() const {
return this->mDevice.get();
}
/**
* Initialize a device.
*
* @param physicalDeviceIndex The index of the physical device to use
* @param familyQueueIndices (Optional) List of queue indices to add for
* explicit allocation
* @param desiredExtensions The desired extensions to load from
* physicalDevice
*/
void initializeDevice(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices = {},
const std::vector<std::string>& desiredExtensions = {});
/**
* Create a managed sequence that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param queueIndex The queue to use from the available queues
* @param nrOfTimestamps The maximum number of timestamps to allocate.
* If zero (default), disables latching of timestamps.
* @returns Shared pointer with initialised sequence
*/
std::shared_ptr<Sequence> sequence(uint32_t queueIndex = 0,
uint32_t totalTimestamps = 0);
/**
* Create a managed tensor that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param data The data to initialize the tensor with
* @param tensorType The type of tensor to initialize
* @returns Shared pointer with initialised tensor
*/
template<typename T>
std::shared_ptr<TensorT<T>> tensorT(
const std::vector<T>& data,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
KP_LOG_DEBUG("Kompute Manager tensor creation triggered");
std::shared_ptr<TensorT<T>> tensor{ new kp::TensorT<T>(
this->mPhysicalDevice, this->mDevice, data, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
std::shared_ptr<Tensor> tensor(
void* data,
uint32_t elementTotalCount,
uint64_t memorySize,
const Tensor::TensorDataTypes& dataType,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset,
Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
{
std::shared_ptr<Tensor> tensor{ new kp::Tensor(this->mPhysicalDevice,
this->mDevice,
data,
elementTotalCount,
memorySize,
dataType,
primaryMemory,
primaryBuffer,
stagingMemory,
stagingBuffer,
offset,
tensorType) };
if (this->mManageResources) {
this->mManagedTensors.push_back(tensor);
}
return tensor;
}
/**
* Default non-template function that can be used to create algorithm
* objects which provides default types to the push and spec constants as
* floats.
*
* @param tensors (optional) The tensors to initialise the algorithm with
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
* defaults to (tensor[0].size(), 1, 1)
* @param specializationConstants (optional) float vector to use for
* specialization constants, and defaults to an empty constant
* @param pushConstants (optional) float vector to use for push constants,
* and defaults to an empty constant
* @returns Shared pointer with initialised algorithm
*/
std::shared_ptr<Algorithm> algorithm(
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
const std::vector<uint32_t>& spirv = {},
const Workgroup& workgroup = {},
const std::vector<float>& specializationConstants = {},
const std::vector<float>& pushConstants = {})
{
return this->algorithm<>(
pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
}
/**
* Create a managed algorithm that will be destroyed by this manager
* if it hasn't been destroyed by its reference count going to zero.
*
* @param tensors (optional) The tensors to initialise the algorithm with
* @param spirv (optional) The SPIRV bytes for the algorithm to dispatch
* @param workgroup (optional) kp::Workgroup for algorithm to use, and
* defaults to (tensor[0].size(), 1, 1)
* @param specializationConstants (optional) templatable vector parameter to
* use for specialization constants, and defaults to an empty constant
* @param pushConstants (optional) templatable vector parameter to use for
* push constants, and defaults to an empty constant
* @returns Shared pointer with initialised algorithm
*/
template<typename S = float, typename P = float>
std::shared_ptr<Algorithm> algorithm(
vk::DescriptorPool *pool,
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<uint32_t>& spirv,
const Workgroup& workgroup,
const std::vector<S>& specializationConstants,
const std::vector<P>& pushConstants)
{
KP_LOG_DEBUG("Kompute Manager algorithm creation triggered");
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
this->mDevice,
pool,
tensors,
spirv,
workgroup,
specializationConstants,
pushConstants) };
if (this->mManageResources) {
this->mManagedAlgorithms.push_back(algorithm);
}
return algorithm;
}
/**
* Destroy the GPU resources and all managed resources by manager.
**/
void destroy();
/**
* Run a pseudo-garbage collection to release all the managed resources
* that have been already freed due to these reaching to zero ref count.
**/
void clear();
/**
* Information about the current device.
*
* @return vk::PhysicalDeviceProperties containing information about the
*device
**/
vk::PhysicalDeviceProperties getDeviceProperties() const;
/**
* List the devices available in the current vulkan instance.
*
* @return vector of physical devices containing their respective properties
**/
std::vector<vk::PhysicalDevice> listDevices() const;
/**
* The current Vulkan instance.
*
* @return a shared pointer to the current Vulkan instance held by this
*object
**/
std::shared_ptr<vk::Instance> getVkInstance() const;
std::shared_ptr<vk::Device> device() const { return mDevice; }
std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
private:
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::Instance> mInstance = nullptr;
bool mFreeInstance = false;
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
std::shared_ptr<vk::Device> mDevice = nullptr;
bool mFreeDevice = false;
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
std::vector<uint32_t> mComputeQueueFamilyIndices;
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
bool mManageResources = false;
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
vk::DebugReportCallbackEXT mDebugReportCallback;
vk::DispatchLoaderDynamic mDebugDispatcher;
#endif
// Create functions
void createInstance();
void createDevice(const std::vector<uint32_t>& familyQueueIndices = {},
uint32_t physicalDeviceIndex = 0,
const std::vector<std::string>& desiredExtensions = {});
};
} // End namespace kp

View file

@ -0,0 +1,313 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#pragma once
#include "kompute/Core.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Container of operations that can be sent to GPU as batch
*/
class Sequence : public std::enable_shared_from_this<Sequence>
{
public:
/**
* Main constructor for sequence which requires core vulkan components to
* generate all dependent resources.
*
* @param physicalDevice Vulkan physical device
* @param device Vulkan logical device
* @param computeQueue Vulkan compute queue
* @param queueIndex Vulkan compute queue index in device
* @param totalTimestamps Maximum number of timestamps to allocate
*/
Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
std::shared_ptr<vk::Queue> computeQueue,
uint32_t queueIndex,
uint32_t totalTimestamps = 0);
/**
* Destructor for sequence which is responsible for cleaning all subsequent
* owned operations.
*/
~Sequence();
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param op Object derived from kp::BaseOp that will be recoreded by the
* sequence which will be used when the operation is evaluated.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> record(std::shared_ptr<OpBase> op);
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> record(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->record(op);
}
/**
* Record function for operation to be added to the GPU queue in batch. This
* template requires classes to be derived from the OpBase class. This
* function also requires the Sequence to be recording, otherwise it will
* not be able to add the operation.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> record(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->record(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job synchronously (with a barrier).
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> eval();
/**
* Resets all the recorded and stored operations, records the operation
* provided and submits into the gpu as a submit job synchronously (with a
* barrier).
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> eval(std::shared_ptr<OpBase> op);
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->eval(op);
}
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(primaryBuffer, stagingBuffer, size, std::forward<TArgs>(params)...) };
return this->eval(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->eval(op);
}
/**
* Eval Async sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job without a barrier. EvalAwait()
* must ALWAYS be called after to ensure the sequence is terminated
* correctly.
*
* @return Boolean stating whether execution was successful.
*/
std::shared_ptr<Sequence> evalAsync();
/**
* Clears currnet operations to record provided one in the vector of
* operations into the gpu as a submit job without a barrier. EvalAwait()
* must ALWAYS be called after to ensure the sequence is terminated
* correctly.
*
* @return Boolean stating whether execution was successful.
*/
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<OpBase> op);
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> evalAsync(
std::vector<std::shared_ptr<Tensor>> tensors,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
* @param TArgs Template parameters that are used to initialise operation
* which allows for extensible configurations on initialisation.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> evalAsync(std::shared_ptr<Algorithm> algorithm,
TArgs&&... params)
{
std::shared_ptr<T> op{ new T(algorithm,
std::forward<TArgs>(params)...) };
return this->evalAsync(op);
}
/**
* Eval Await waits for the fence to finish processing and then once it
* finishes, it runs the postEval of all operations.
*
* @param waitFor Number of milliseconds to wait before timing out.
* @return shared_ptr<Sequence> of the Sequence class itself
*/
std::shared_ptr<Sequence> evalAwait(uint64_t waitFor = UINT64_MAX);
/**
* Clear function clears all operations currently recorded and starts
* recording again.
*/
void clear();
/**
* Return the timestamps that were latched at the beginning and
* after each operation during the last eval() call.
*/
std::vector<std::uint64_t> getTimestamps();
/**
* Begins recording commands for commands to be submitted into the command
* buffer.
*/
void begin();
/**
* Ends the recording and stops recording commands when the record command
* is sent.
*/
void end();
/**
* Returns true if the sequence is currently in recording activated.
*
* @return Boolean stating if recording ongoing.
*/
bool isRecording() const;
/**
* Returns true if the sequence has been initialised, and it's based on the
* GPU resources being referenced.
*
* @return Boolean stating if is initialized
*/
bool isInit() const;
/**
* Clears command buffer and triggers re-record of all the current
* operations saved, which is useful if the underlying kp::Tensors or
* kp::Algorithms are modified and need to be re-recorded.
*/
void rerecord();
/**
* Returns true if the sequence is currently running - mostly used for async
* workloads.
*
* @return Boolean stating if currently running.
*/
bool isRunning() const;
/**
* Destroys and frees the GPU resources which include the buffer and memory
* and sets the sequence as init=False.
*/
void destroy();
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice = nullptr;
std::shared_ptr<vk::Device> mDevice = nullptr;
std::shared_ptr<vk::Queue> mComputeQueue = nullptr;
uint32_t mQueueIndex = -1;
// -------------- OPTIONALLY OWNED RESOURCES
std::shared_ptr<vk::CommandPool> mCommandPool = nullptr;
bool mFreeCommandPool = false;
std::shared_ptr<vk::CommandBuffer> mCommandBuffer = nullptr;
bool mFreeCommandBuffer = false;
// -------------- ALWAYS OWNED RESOURCES
vk::Fence mFence;
std::vector<std::shared_ptr<OpBase>> mOperations{};
std::shared_ptr<vk::QueryPool> timestampQueryPool = nullptr;
// State
bool mRecording = false;
bool mIsRunning = false;
// Create functions
void createCommandPool();
void createCommandBuffer();
void createTimestampQueryPool(uint32_t totalTimestamps);
};
} // End namespace kp

View file

@ -0,0 +1,306 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#pragma once
#include "kompute/Core.hpp"
#include "logger/Logger.hpp"
#include <memory>
#include <string>
namespace kp {
/**
* Structured data used in GPU operations.
*
* Tensors are the base building block in Kompute to perform operations across
* GPUs. Each tensor would have a respective Vulkan memory and buffer, which
* would be used to store their respective data. The tensors can be used for GPU
* data storage or transfer.
*/
class Tensor
{
public:
/**
* Type for tensors created: Device allows memory to be transferred from
* staging buffers. Staging are host memory visible. Storage are device
* visible but are not set up to transfer or receive data (only for shader
* storage).
*/
enum class TensorTypes
{
eDevice = 0, ///< Type is device memory, source and destination
eHost = 1, ///< Type is host memory, source and destination
eStorage = 2, ///< Type is Device memory (only)
};
enum class TensorDataTypes
{
eBool = 0,
eInt = 1,
eUnsignedInt = 2,
eFloat = 3,
eDouble = 4,
};
static std::string toString(TensorDataTypes dt);
static std::string toString(TensorTypes dt);
/**
* Constructor with data provided which would be used to create the
* respective vulkan buffer and memory.
*
* @param physicalDevice The physical device to use to fetch properties
* @param device The device to use to create the buffer and memory from
* @param data Non-zero-sized vector of data that will be used by the
* tensor
* @param tensorTypes Type for the tensor which is of type TensorTypes
*/
Tensor(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
std::shared_ptr<vk::Device> device,
void* data,
uint32_t elementTotalCount,
uint32_t memorySize,
const TensorDataTypes& dataType,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset,
const TensorTypes& tensorType = TensorTypes::eDevice);
/**
* Destructor which is in charge of freeing vulkan resources unless they
* have been provided externally.
*/
virtual ~Tensor();
/**
* Function to trigger reinitialisation of the tensor buffer and memory with
* new data as well as new potential device type.
*
* @param data Vector of data to use to initialise vector from
* @param tensorType The type to use for the tensor
*/
void rebuild(void* data,
uint32_t elementTotalCount,
uint64_t memorySize,
vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset);
/**
* Destroys and frees the GPU resources which include the buffer and memory.
*/
void destroy();
/**
* Check whether tensor is initialized based on the created gpu resources.
*
* @returns Boolean stating whether tensor is initialized
*/
bool isInit();
/**
* Retrieve the tensor type of the Tensor
*
* @return Tensor type of tensor
*/
TensorTypes tensorType();
/**
* Records a copy from the memory of the tensor provided to the current
* thensor. This is intended to pass memory into a processing, to perform
* a staging buffer transfer, or to gather output (between others).
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param copyFromTensor Tensor to copy the data from
*/
void recordCopyFrom(const vk::CommandBuffer& commandBuffer,
std::shared_ptr<Tensor> copyFromTensor);
/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
*/
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer);
/**
* Records a copy from the internal device memory to the staging memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
*/
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
/**
* Records the buffer memory barrier into the primary buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordPrimaryBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Records the buffer memory barrier into the staging buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordStagingBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Constructs a vulkan descriptor buffer info which can be used to specify
* and reference the underlying buffer component of the tensor without
* exposing it.
*
* @return Descriptor buffer info with own buffer
*/
vk::DescriptorBufferInfo constructDescriptorBufferInfo();
/**
* Returns the size/magnitude of the Tensor, which will be the total number
* of elements across all dimensions
*
* @return Unsigned integer representing the total number of elements
*/
uint32_t size();
/**
* Returns the total memory size of the data contained by the Tensor object
*
* @return Unsigned integer representing the memory of the tensor in bytes.
*/
uint64_t memorySize();
/**
* Retrieve the data type of the tensor (host, device, storage)
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
TensorDataTypes dataType();
/**
* Retrieve the raw data via the pointer to the memory that contains the raw
* memory of this current tensor. This tensor gets changed to a nullptr when
* the Tensor is removed.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
void* rawData();
/**
* Sets / resets the data of the tensor which is directly done on the GPU
* host visible memory available by the tensor.
*/
void setRawData(const void* data);
/**
* Template to return the pointer data converted by specific type, which
* would be any of the supported types including float, double, int32,
* uint32 and bool.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
template<typename T>
T* data()
{
return (T*)this->mRawData;
}
/**
* Template to get the data of the current tensor as a vector of specific
* type, which would be any of the supported types including float, double,
* int32, uint32 and bool.
*
* @return Vector of type provided by template.
*/
template<typename T>
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}
protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
TensorDataTypes mDataType;
uint32_t mSize = 0;
uint64_t mMemorySize = 0;
vk::DeviceSize mOffset = 0;
void* mRawData = nullptr;
private:
// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
vk::Buffer *mPrimaryBuffer = nullptr;
vk::Buffer *mStagingBuffer = nullptr;
vk::DeviceMemory *mPrimaryMemory = nullptr;
vk::DeviceMemory *mStagingMemory = nullptr;
void setGPUResources(vk::DeviceMemory *primaryMemory,
vk::Buffer *primaryBuffer,
vk::DeviceMemory *stagingMemory,
vk::Buffer *stagingBuffer,
vk::DeviceSize offset);
void recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
vk::Buffer *bufferFrom,
vk::Buffer *bufferTo,
vk::DeviceSize bufferSize,
vk::BufferCopy copyRegion);
void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
const vk::Buffer& buffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
// Private util functions
vk::BufferUsageFlags getPrimaryBufferUsageFlags();
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
};
template<typename T>
class TensorT : public Tensor
{
public:
~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); }
TensorDataTypes dataType();
};
} // End namespace kp

View file

@ -0,0 +1,197 @@
#pragma once
#define KOMPUTE_LOG_LEVEL_TRACE 0
#define KOMPUTE_LOG_LEVEL_DEBUG 1
#define KOMPUTE_LOG_LEVEL_INFO 2
#define KOMPUTE_LOG_LEVEL_WARN 3
#define KOMPUTE_LOG_LEVEL_ERROR 4
#define KOMPUTE_LOG_LEVEL_CRITICAL 5
#define KOMPUTE_LOG_LEVEL_OFF 6
// Logging is disabled entirely.
#if KOMPUTE_OPT_LOG_LEVEL_DISABLED
#define KP_LOG_TRACE(...)
#define KP_LOG_DEBUG(...)
#define KP_LOG_INFO(...)
#define KP_LOG_WARN(...)
#define KP_LOG_ERROR(...)
#else
#if !KOMPUTE_OPT_USE_SPDLOG
#if VK_USE_PLATFORM_ANDROID_KHR
#include <android/log.h>
#include <fmt/core.h>
static const char* KOMPUTE_LOG_TAG = "KomputeLog";
#else
#if KOMPUTE_BUILD_PYTHON
#include <pybind11/pybind11.h>
namespace py = pybind11;
// from python/src/main.cpp
extern py::object kp_trace, kp_debug, kp_info, kp_warning, kp_error;
#else
#include <fmt/core.h>
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#include <spdlog/spdlog.h>
#endif // !KOMPUTE_OPT_USE_SPDLOG
#include <set>
#include <string>
#include <vector>
namespace logger {
// Setup the logger, note the loglevel can not be set below the CMake log level
// (To change this use -DKOMPUTE_OPT_LOG_LEVEL=...)
void
setupLogger();
// Logging is enabled, but we do not use Spdlog. So we use fmt in case nothing
// else is defined, overriding logging.
#if !KOMPUTE_OPT_USE_SPDLOG
#ifndef KP_LOG_TRACE
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_TRACE
#if VK_USE_PLATFORM_ANDROID_KHR
#define KP_LOG_TRACE(...) \
((void)__android_log_write( \
ANDROID_LOG_VERBOSE, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
#else
#if KOMPUTE_BUILD_PYTHON
#define KP_LOG_DEBUG(...) kp_trace(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_TRACE(...) \
fmt::print("[{} {}] [trace] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#define KP_LOG_TRACE(...)
#endif
#endif // !KP_LOG_TRACE
#ifndef KP_LOG_DEBUG
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_DEBUG
#if VK_USE_PLATFORM_ANDROID_KHR
#define KP_LOG_DEBUG(...) \
((void)__android_log_write( \
ANDROID_LOG_DEBUG, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
#else
#if KOMPUTE_BUILD_PYTHON
#define KP_LOG_DEBUG(...) kp_debug(fmt::format(__VA_ARGS__))
#else
#ifdef __FILE_NAME__ // gcc 12 provides only file name without path
#define KP_LOG_DEBUG(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE_NAME__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#else
#define KP_LOG_DEBUG(...) \
fmt::print("[{} {}] [debug] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // __FILE__NAME__
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#define KP_LOG_DEBUG(...)
#endif
#endif // !KP_LOG_DEBUG
#ifndef KP_LOG_INFO
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_INFO
#if VK_USE_PLATFORM_ANDROID_KHR
#define KP_LOG_INFO(...) \
((void)__android_log_write( \
ANDROID_LOG_INFO, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
#else
#if KOMPUTE_BUILD_PYTHON
#define KP_LOG_DEBUG(...) kp_info(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_INFO(...) \
fmt::print("[{} {}] [info] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#define KP_LOG_INFO(...)
#endif
#endif // !KP_LOG_INFO
#ifndef KP_LOG_WARN
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_WARN
#if VK_USE_PLATFORM_ANDROID_KHR
#define KP_LOG_WARN(...) \
((void)__android_log_write( \
ANDROID_LOG_WARN, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
#else
#if KOMPUTE_BUILD_PYTHON
#define KP_LOG_DEBUG(...) kp_warning(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_WARN(...) \
fmt::print("[{} {}] [warn] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#define KP_LOG_WARN(...)
#endif
#endif // !KP_LOG_WARN
#ifndef KP_LOG_ERROR
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_ERROR
#if VK_USE_PLATFORM_ANDROID_KHR
#define KP_LOG_ERROR(...) \
((void)__android_log_write( \
ANDROID_LOG_ERROR, KOMPUTE_LOG_TAG, fmt::format(__VA_ARGS__).c_str()))
#else
#if KOMPUTE_BUILD_PYTHON
#define KP_LOG_DEBUG(...) kp_error(fmt::format(__VA_ARGS__))
#else
#define KP_LOG_ERROR(...) \
fmt::print("[{} {}] [error] [{}:{}] {}\n", \
__DATE__, \
__TIME__, \
__FILE__, \
__LINE__, \
fmt::format(__VA_ARGS__))
#endif // KOMPUTE_BUILD_PYTHON
#endif // VK_USE_PLATFORM_ANDROID_KHR
#else
#define KP_LOG_ERROR(...)
#endif
#endif // !KP_LOG_ERROR
#else
#define KP_LOG_TRACE(...) SPDLOG_TRACE(__VA_ARGS__)
#define KP_LOG_DEBUG(...) SPDLOG_DEBUG(__VA_ARGS__)
#define KP_LOG_INFO(...) SPDLOG_INFO(__VA_ARGS__)
#define KP_LOG_WARN(...) SPDLOG_WARN(__VA_ARGS__)
#define KP_LOG_ERROR(...) SPDLOG_ERROR(__VA_ARGS__)
void
setLogLevel(spdlog::level::level_enum level);
spdlog::level::level_enum
getLogLevel();
#endif // !KOMPUTE_OPT_USE_SPDLOG
} // namespace logger
#endif // KOMPUTE_OPT_LOG_LEVEL_DISABLED

View file

@ -0,0 +1,86 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* By default it enables the user to provide a dynamic number of tensors
* which are then passed as inputs.
*/
class OpAlgoDispatch : public OpBase
{
public:
/**
* Constructor that stores the algorithm to use as well as the relevant
* push constants to override when recording.
*
* @param algorithm The algorithm object to use for dispatch
* @param pushConstants The push constants to use for override
*/
template<typename T = float>
OpAlgoDispatch(const std::shared_ptr<kp::Algorithm>& algorithm,
const std::vector<T>& pushConstants = {})
{
KP_LOG_DEBUG("Kompute OpAlgoDispatch constructor");
this->mAlgorithm = algorithm;
if (pushConstants.size()) {
uint32_t memorySize = sizeof(decltype(pushConstants.back()));
uint32_t size = pushConstants.size();
uint32_t totalSize = size * memorySize;
this->mPushConstantsData = malloc(totalSize);
memcpy(this->mPushConstantsData, pushConstants.data(), totalSize);
this->mPushConstantsDataTypeMemorySize = memorySize;
this->mPushConstantsSize = size;
}
}
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
virtual ~OpAlgoDispatch() override;
/**
* This records the commands that are to be sent to the GPU. This includes
* the barriers that ensure the memory has been copied before going in and
* out of the shader, as well as the dispatch operation that sends the
* shader processing to the gpu. This function also records the GPU memory
* copy of the output data for the staging buffer so it can be read by the
* host.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::shared_ptr<Algorithm> mAlgorithm;
void* mPushConstantsData = nullptr;
uint32_t mPushConstantsDataTypeMemorySize = 0;
uint32_t mPushConstantsSize = 0;
};
} // End namespace kp

View file

@ -0,0 +1,62 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
namespace kp {
/**
* Base Operation which provides the high level interface that Kompute
* operations implement in order to perform a set of actions in the GPU.
*
* Operations can perform actions on tensors, and optionally can also own an
* Algorithm with respective parameters. kp::Operations with kp::Algorithms
* would inherit from kp::OpBaseAlgo.
*/
class OpBase
{
public:
/**
* Default destructor for OpBase class. This OpBase destructor class should
* always be called to destroy and free owned resources unless it is
* intended to destroy the resources in the parent class.
*/
virtual ~OpBase() { KP_LOG_DEBUG("Kompute OpBase destructor started"); }
/**
* The record function is intended to only send a record command or run
* commands that are expected to record operations that are to be submitted
* as a batch into the GPU.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void record(const vk::CommandBuffer& commandBuffer) = 0;
/**
* Pre eval is called before the Sequence has called eval and submitted the
* commands to the GPU for processing, and can be used to perform any
* per-eval setup steps required as the computation iteration begins. It's
* worth noting that there are situations where eval can be called multiple
* times, so the resources that are created should be idempotent in case
* it's called multiple times in a row.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) = 0;
/**
* Post eval is called after the Sequence has called eval and submitted the
* commands to the GPU for processing, and can be used to perform any
* tear-down steps required as the computation iteration finishes. It's
* worth noting that there are situations where eval can be called multiple
* times, so the resources that are destroyed should not require a re-init
* unless explicitly provided by the user.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) = 0;
};
} // End namespace kp

View file

@ -0,0 +1,50 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/operations/OpBase.hpp"
namespace kp {
class OpBufferSyncDevice : public OpBase
{
public:
OpBufferSyncDevice(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpBufferSyncDevice() override;
/**
* For device buffers, it records the copy command for the buffer to copy
* the data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
vk::Buffer *mPrimaryBuffer;
vk::Buffer *mStagingBuffer;
vk::DeviceSize mSize;
};
} // End namespace kp

View file

@ -0,0 +1,50 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/operations/OpBase.hpp"
namespace kp {
class OpBufferSyncLocal : public OpBase
{
public:
OpBufferSyncLocal(
vk::Buffer *primaryBuffer,
vk::Buffer *stagingBuffer,
vk::DeviceSize size);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpBufferSyncLocal() override;
/**
* For device buffers, it records the copy command for the buffer to copy
* the data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
vk::Buffer *mPrimaryBuffer;
vk::Buffer *mStagingBuffer;
vk::DeviceSize mSize;
};
} // End namespace kp

View file

@ -0,0 +1,81 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Algorithm.hpp"
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that provides a general abstraction that simplifies the use of
* algorithm and parameter components which can be used with shaders.
* It exposes the pipeline barrier functionality specifically for memory
* barriers that can be configured through the respective source and destination
* masks
*/
class OpMemoryBarrier : public OpBase
{
public:
/**
* Constructor that stores tensors as well as memory barrier parameters to
* be used to create a pipeline barrier on the respective primary or staging
* tensor.
*
* @param tensors The tensors to apply the memory barriers on
* @param srcAccessMask The kp::AccessFlagBits for the source access mask
* @param dstAccessMask The kp::AccessFlagBits for the destination access
* mask
* @param srcStageMask The kp::PipelineStageFlagBits for the source stage
* mask
* @param dstStageMask The kp::PipelineStageFlagBits for the destination
* stage mask
* @param barrierOnPrimary Boolean to select primary or secondary buffers on
* tensors
*/
OpMemoryBarrier(const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary = true);
/**
* Default destructor, which is in charge of destroying the reference to the
* tensors and all the relevant access / stage masks created
*/
virtual ~OpMemoryBarrier() override;
/**
* This records the memory barrier with the access and stage masks provided
* across all relevant tensors.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
const vk::AccessFlagBits mSrcAccessMask;
const vk::AccessFlagBits mDstAccessMask;
const vk::PipelineStageFlagBits mSrcStageMask;
const vk::PipelineStageFlagBits mDstStageMask;
const bool mBarrierOnPrimary;
const std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -0,0 +1,58 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <fstream>
#include "kompute/Core.hpp"
#include "ShaderOpMult.hpp"
#include "kompute/Algorithm.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpAlgoDispatch.hpp"
namespace kp {
/**
* Operation that performs multiplication on two tensors and outpus on third
* tensor.
*/
class OpMult : public OpAlgoDispatch
{
public:
/**
* Default constructor with parameters that provides the bare minimum
* requirements for the operations to be able to create and manage their
* sub-components.
*
* @param tensors Tensors that are to be used in this operation
* @param algorithm An algorithm that will be overridden with the OpMult
* shader data and the tensors provided which are expected to be 3
*/
OpMult(std::vector<std::shared_ptr<Tensor>> tensors,
std::shared_ptr<Algorithm> algorithm)
: OpAlgoDispatch(algorithm)
{
KP_LOG_DEBUG("Kompute OpMult constructor with params");
if (tensors.size() != 3) {
throw std::runtime_error(
"Kompute OpMult expected 3 tensors but got " +
std::to_string(tensors.size()));
}
const std::vector<uint32_t> spirv = std::vector<uint32_t>(
SHADEROPMULT_COMP_SPV.begin(), SHADEROPMULT_COMP_SPV.end());
algorithm->rebuild<>(tensors, spirv);
}
/**
* Default destructor, which is in charge of destroying the algorithm
* components but does not destroy the underlying tensors
*/
~OpMult() override { KP_LOG_DEBUG("Kompute OpMult destructor started"); }
};
} // End namespace kp

View file

@ -0,0 +1,63 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that copies the data from the first tensor to the rest of the
* tensors provided, using a record command for all the vectors. This operation
* does not own/manage the memory of the tensors passed to it. The operation
* must only receive tensors of type
*/
class OpTensorCopy : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorCopy(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorCopy() override;
/**
* Records the copy commands from the first tensor into all the other
* tensors provided. Also optionally records a barrier.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Copies the local vectors for all the tensors to sync the data with the
* gpu.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -0,0 +1,66 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that syncs tensor's device by mapping local data into the device
* memory. For TensorTypes::eDevice it will use a record operation for the
* memory to be syncd into GPU memory which means that the operation will be
* done in sync with GPU commands. For TensorTypes::eHost it will only map the
* data into host memory which will happen during preEval before the recorded
* commands are dispatched.
*/
class OpTensorSyncDevice : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation. The tensos
* provided cannot be of type TensorTypes::eStorage.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorSyncDevice() override;
/**
* For device tensors, it records the copy command for the tensor to copy
* the data from its staging to device memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any postEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
vk::Buffer *mPrimaryBuffer;
vk::Buffer *mStagingBuffer;
vk::DeviceSize mSize;
};
} // End namespace kp

View file

@ -0,0 +1,66 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "kompute/Core.hpp"
#include "kompute/Tensor.hpp"
#include "kompute/operations/OpBase.hpp"
namespace kp {
/**
* Operation that syncs tensor's local memory by mapping device data into the
* local CPU memory. For TensorTypes::eDevice it will use a record operation
* for the memory to be syncd into GPU memory which means that the operation
* will be done in sync with GPU commands. For TensorTypes::eHost it will
* only map the data into host memory which will happen during preEval before
* the recorded commands are dispatched.
*/
class OpTensorSyncLocal : public OpBase
{
public:
/**
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation. The tensors
* provided cannot be of type TensorTypes::eStorage.
*
* @param tensors Tensors that will be used to create in operation.
*/
OpTensorSyncLocal(const std::vector<std::shared_ptr<Tensor>>& tensors);
/**
* Default destructor. This class does not manage memory so it won't be
* expecting the parent to perform a release.
*/
~OpTensorSyncLocal() override;
/**
* For device tensors, it records the copy command for the tensor to copy
* the data from its device to staging memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
void record(const vk::CommandBuffer& commandBuffer) override;
/**
* Does not perform any preEval commands.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void preEval(const vk::CommandBuffer& commandBuffer) override;
/**
* For host tensors it performs the map command from the host memory into
* local memory.
*
* @param commandBuffer The command buffer to record the command into.
*/
virtual void postEval(const vk::CommandBuffer& commandBuffer) override;
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
};
} // End namespace kp

View file

@ -0,0 +1,69 @@
cmake_minimum_required(VERSION 3.20)
set(LOGGER_SOURCES Logger.cpp)
add_library(kp_logger ${LOGGER_SOURCES})
# Define log levels in code
add_compile_definitions(KOMPUTE_LOG_LEVEL_TRACE=0)
add_compile_definitions(KOMPUTE_LOG_LEVEL_DEBUG=1)
add_compile_definitions(KOMPUTE_LOG_LEVEL_INFO=2)
add_compile_definitions(KOMPUTE_LOG_LEVEL_WARN=3)
add_compile_definitions(KOMPUTE_LOG_LEVEL_ERROR=4)
add_compile_definitions(KOMPUTE_LOG_LEVEL_CRITICAL=5)
add_compile_definitions(KOMPUTE_LOG_LEVEL_OFF=6)
if(KOMPUTE_OPT_BUILD_PYTHON AND KOMPUTE_OPT_USE_SPDLOG)
message(FATAL_ERROR "'KOMPUTE_OPT_BUILD_PYTHON' is incompatible with 'KOMPUTE_OPT_USE_SPDLOG'. To continue set either one option to 'OFF'.")
endif()
if(KOMPUTE_OPT_ANDROID_BUILD AND KOMPUTE_OPT_USE_SPDLOG)
message(FATAL_ERROR "'KOMPUTE_OPT_ANDROID_BUILD' is incompatible with 'KOMPUTE_OPT_USE_SPDLOG'. To continue set either one option to 'OFF'.")
endif()
if(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Trace")
set(KOMPUTE_OPT_LOG_LEVEL TRACE)
message(STATUS "Using log level Trace")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Debug")
set(KOMPUTE_OPT_LOG_LEVEL DEBUG)
message(STATUS "Using log level Debug")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Info")
set(KOMPUTE_OPT_LOG_LEVEL INFO)
message(STATUS "Using log level Info")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Warn")
set(KOMPUTE_OPT_LOG_LEVEL WARN)
message(STATUS "Using log level Warn")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Error")
set(KOMPUTE_OPT_LOG_LEVEL ERROR)
message(STATUS "Using log level Error")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Critical")
set(KOMPUTE_OPT_LOG_LEVEL CRITICAL)
message(STATUS "Using log level Critical")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Off")
set(KOMPUTE_OPT_LOG_LEVEL OFF)
message(STATUS "Using log level Off")
elseif(${KOMPUTE_OPT_LOG_LEVEL} STREQUAL "Default")
set(KOMPUTE_OPT_LOG_LEVEL $<IF:$<CONFIG:Debug>,DEBUG,INFO>)
message(STATUS "Setting KOMPUTE_OPT_LOG_LEVEL to according to the build type")
else()
message(FATAL_ERROR "Log level '${KOMPUTE_OPT_LOG_LEVEL}' unknown, use -DKOMPUTE_OPT_LOG_LEVEL={Trace, Debug, Info, Warn, Error, Critical, Off, Default} to set it to a correct value.")
endif()
# Always make sure we define the Kompute log level independent of the Spdlog log level
target_compile_definitions(kp_logger INTERFACE KOMPUTE_OPT_ACTIVE_LOG_LEVEL=KOMPUTE_LOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
# Link depending on how the logger should be setup
if(NOT KOMPUTE_OPT_LOG_LEVEL_DISABLED)
if(KOMPUTE_OPT_USE_SPDLOG)
target_link_libraries(kp_logger PUBLIC spdlog::spdlog)
target_compile_definitions(spdlog INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
target_compile_definitions(kp_logger INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
message(STATUS "setting SPDLOG_ACTIVE_LEVEL to SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL}")
if(KOMPUTE_OPT_SPDLOG_ASYNC_MODE)
target_compile_definitions(kp_logger INTERFACE KOMPUTE_SPDLOG_ASYNC_LOGGING=1)
endif()
else()
target_link_libraries(kp_logger PUBLIC fmt::fmt)
endif()
endif()

View file

@ -0,0 +1,101 @@
#include "kompute/logger/Logger.hpp"
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
#if !KOMPUTE_OPT_USE_SPDLOG
#else
#include <cassert>
#include <iostream>
#include <memory>
#include <mutex>
#include <spdlog/async.h>
#include <spdlog/common.h>
#include <spdlog/logger.h>
#include <spdlog/sinks/stdout_color_sinks.h>
#include <spdlog/spdlog.h>
#include <string>
#endif // !KOMPUTE_OPT_USE_SPDLOG
namespace logger {
#if !KOMPUTE_OPT_USE_SPDLOG
void
setupLogger()
{
}
#else
constexpr int THREAD_QUEUE_LENGTH = 8192;
void
setupLogger()
{
// Ensure we setup the logger only once
static bool setup = false;
static std::mutex setupMutex{};
setupMutex.lock();
if (setup) {
setupMutex.unlock();
return;
}
setup = true;
setupMutex.unlock();
spdlog::init_thread_pool(THREAD_QUEUE_LENGTH, 1);
spdlog::sink_ptr console_sink =
std::make_shared<spdlog::sinks::stdout_color_sink_mt>();
#if SPDLOG_ACTIVE_LEVEL < SPDLOG_LEVEL_INFO
console_sink->set_pattern("[%H:%M:%S %z] [%^%=9l%$] [%=21s] %v");
#else
console_sink->set_pattern("[%H:%M:%S %z] [%^%=9l%$] [%=15s] %v");
#endif
std::vector<spdlog::sink_ptr> sinks{ console_sink };
// TODO: Add flag in compile flags
std::shared_ptr<spdlog::logger> logger =
#if KOMPUTE_SPDLOG_ASYNC_LOGGING
std::make_shared<spdlog::async_logger>(
"",
sinks.begin(),
sinks.end(),
spdlog::thread_pool(),
spdlog::async_overflow_policy::block);
#else
std::make_shared<spdlog::logger>(
"",
sinks.begin(),
sinks.end());
#endif
logger->set_level(getLogLevel());
spdlog::set_default_logger(logger);
}
spdlog::level::level_enum
getLogLevel()
{
#if SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_TRACE
return spdlog::level::trace;
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_DEBUG
return spdlog::level::debug;
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_INFO
return spdlog::level::info;
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_WARN
return spdlog::level::warn;
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_ERROR
return spdlog::level::error;
#elif SPDLOG_ACTIVE_LEVEL == SPDLOG_LEVEL_CRITICAL
return spdlog::level::critical;
#else
return spdlog::level::off;
#endif
}
void
setLogLevel(const spdlog::level::level_enum level)
{
spdlog::default_logger()->set_level(level);
}
#endif // !KOMPUTE_OPT_USE_SPDLOG
} // namespace logger
#endif

View file

@ -0,0 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# ######################
cmake_minimum_required(VERSION 3.20)
add_subdirectory(glsl)

View file

@ -0,0 +1,26 @@
# SPDX-License-Identifier: Apache-2.0
# ######################
cmake_minimum_required(VERSION 3.20)
# Check if build shaders from source is enabled
if(KOMPUTE_OPT_BUILD_SHADERS)
vulkan_compile_shader(INFILE ShaderOpMult.comp
OUTFILE ShaderOpMult.hpp
NAMESPACE "kp")
vulkan_compile_shader(INFILE ShaderLogisticRegression.comp
OUTFILE ShaderLogisticRegression.hpp
NAMESPACE "kp")
else() # Else we will use our precompiled versions
add_custom_command(OUTPUT $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/ShaderOpMult.hpp.in $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp)
add_custom_command(OUTPUT $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/ShaderLogisticRegression.hpp.in $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp)
endif()
add_library(kp_shader INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/ShaderOpMult.hpp"
"${CMAKE_CURRENT_BINARY_DIR}/ShaderLogisticRegression.hpp")
target_include_directories(kp_shader INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
# Make sure we install shaders:
install(FILES $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderOpMult.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/ShaderLogisticRegression.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

View file

@ -0,0 +1,52 @@
#version 450
layout (constant_id = 0) const float m = 0;
layout (local_size_x = 1) in;
layout(set = 0, binding = 0) buffer bxi { float xi[]; };
layout(set = 0, binding = 1) buffer bxj { float xj[]; };
layout(set = 0, binding = 2) buffer by { float y[]; };
layout(set = 0, binding = 3) buffer bwin { float win[]; };
layout(set = 0, binding = 4) buffer bwouti { float wouti[]; };
layout(set = 0, binding = 5) buffer bwoutj { float woutj[]; };
layout(set = 0, binding = 6) buffer bbin { float bin[]; };
layout(set = 0, binding = 7) buffer bbout { float bout[]; };
layout(set = 0, binding = 8) buffer blout { float lout[]; };
float sigmoid(float z) {
return 1.0 / (1.0 + exp(-z));
}
float inference(vec2 x, vec2 w, float b) {
// Compute the linear mapping function
float z = dot(w, x) + b;
// Calculate the y-hat with sigmoid
float yHat = sigmoid(z);
return yHat;
}
float calculateLoss(float yHat, float y) {
return -(y * log(yHat) + (1.0 - y) * log(1.0 - yHat));
}
void main() {
uint idx = gl_GlobalInvocationID.x;
vec2 wCurr = vec2(win[0], win[1]);
float bCurr = bin[0];
vec2 xCurr = vec2(xi[idx], xj[idx]);
float yCurr = y[idx];
float yHat = inference(xCurr, wCurr, bCurr);
float dZ = yHat - yCurr;
vec2 dW = (1. / m) * xCurr * dZ;
float dB = (1. / m) * dZ;
wouti[idx] = dW.x;
woutj[idx] = dW.y;
bout[idx] = dB;
lout[idx] = calculateLoss(yHat, yCurr);
}

View file

@ -0,0 +1,310 @@
#pragma once
#include <array>
#include <cstdint>
namespace kp {
const std::array<uint32_t, 1204> SHADERLOGISTICREGRESSION_COMP_SPV = {
0x07230203, 0x00010000, 0x0008000a, 0x000000ae,
0x00000000, 0x00020011, 0x00000001, 0x0006000b,
0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e,
0x00000000, 0x0003000e, 0x00000000, 0x00000001,
0x0006000f, 0x00000005, 0x00000004, 0x6e69616d,
0x00000000, 0x00000041, 0x00060010, 0x00000004,
0x00000011, 0x00000001, 0x00000001, 0x00000001,
0x00030003, 0x00000002, 0x000001c2, 0x00040005,
0x00000004, 0x6e69616d, 0x00000000, 0x00050005,
0x0000000a, 0x6d676973, 0x2864696f, 0x003b3166,
0x00030005, 0x00000009, 0x0000007a, 0x00080005,
0x00000012, 0x65666e69, 0x636e6572, 0x66762865,
0x66763b32, 0x31663b32, 0x0000003b, 0x00030005,
0x0000000f, 0x00000078, 0x00030005, 0x00000010,
0x00000077, 0x00030005, 0x00000011, 0x00000062,
0x00080005, 0x00000017, 0x636c6163, 0x74616c75,
0x736f4c65, 0x31662873, 0x3b31663b, 0x00000000,
0x00040005, 0x00000015, 0x74614879, 0x00000000,
0x00030005, 0x00000016, 0x00000079, 0x00030005,
0x00000021, 0x0000007a, 0x00040005, 0x00000027,
0x74614879, 0x00000000, 0x00040005, 0x00000028,
0x61726170, 0x0000006d, 0x00030005, 0x0000003e,
0x00786469, 0x00080005, 0x00000041, 0x475f6c67,
0x61626f6c, 0x766e496c, 0x7461636f, 0x496e6f69,
0x00000044, 0x00040005, 0x00000046, 0x72754377,
0x00000072, 0x00040005, 0x00000048, 0x6e697762,
0x00000000, 0x00040006, 0x00000048, 0x00000000,
0x006e6977, 0x00030005, 0x0000004a, 0x00000000,
0x00040005, 0x00000054, 0x72754362, 0x00000072,
0x00040005, 0x00000056, 0x6e696262, 0x00000000,
0x00040006, 0x00000056, 0x00000000, 0x006e6962,
0x00030005, 0x00000058, 0x00000000, 0x00040005,
0x0000005b, 0x72754378, 0x00000072, 0x00030005,
0x0000005d, 0x00697862, 0x00040006, 0x0000005d,
0x00000000, 0x00006978, 0x00030005, 0x0000005f,
0x00000000, 0x00030005, 0x00000064, 0x006a7862,
0x00040006, 0x00000064, 0x00000000, 0x00006a78,
0x00030005, 0x00000066, 0x00000000, 0x00040005,
0x0000006b, 0x72754379, 0x00000072, 0x00030005,
0x0000006d, 0x00007962, 0x00040006, 0x0000006d,
0x00000000, 0x00000079, 0x00030005, 0x0000006f,
0x00000000, 0x00040005, 0x00000073, 0x74614879,
0x00000000, 0x00040005, 0x00000074, 0x61726170,
0x0000006d, 0x00040005, 0x00000076, 0x61726170,
0x0000006d, 0x00040005, 0x00000078, 0x61726170,
0x0000006d, 0x00030005, 0x0000007b, 0x00005a64,
0x00030005, 0x0000007f, 0x00005764, 0x00030005,
0x00000080, 0x0000006d, 0x00030005, 0x00000086,
0x00004264, 0x00040005, 0x0000008b, 0x756f7762,
0x00006974, 0x00050006, 0x0000008b, 0x00000000,
0x74756f77, 0x00000069, 0x00030005, 0x0000008d,
0x00000000, 0x00040005, 0x00000093, 0x756f7762,
0x00006a74, 0x00050006, 0x00000093, 0x00000000,
0x74756f77, 0x0000006a, 0x00030005, 0x00000095,
0x00000000, 0x00040005, 0x0000009c, 0x756f6262,
0x00000074, 0x00050006, 0x0000009c, 0x00000000,
0x74756f62, 0x00000000, 0x00030005, 0x0000009e,
0x00000000, 0x00040005, 0x000000a3, 0x756f6c62,
0x00000074, 0x00050006, 0x000000a3, 0x00000000,
0x74756f6c, 0x00000000, 0x00030005, 0x000000a5,
0x00000000, 0x00040005, 0x000000a7, 0x61726170,
0x0000006d, 0x00040005, 0x000000a9, 0x61726170,
0x0000006d, 0x00040047, 0x00000041, 0x0000000b,
0x0000001c, 0x00040047, 0x00000047, 0x00000006,
0x00000004, 0x00050048, 0x00000048, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x00000048,
0x00000003, 0x00040047, 0x0000004a, 0x00000022,
0x00000000, 0x00040047, 0x0000004a, 0x00000021,
0x00000003, 0x00040047, 0x00000055, 0x00000006,
0x00000004, 0x00050048, 0x00000056, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x00000056,
0x00000003, 0x00040047, 0x00000058, 0x00000022,
0x00000000, 0x00040047, 0x00000058, 0x00000021,
0x00000006, 0x00040047, 0x0000005c, 0x00000006,
0x00000004, 0x00050048, 0x0000005d, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x0000005d,
0x00000003, 0x00040047, 0x0000005f, 0x00000022,
0x00000000, 0x00040047, 0x0000005f, 0x00000021,
0x00000000, 0x00040047, 0x00000063, 0x00000006,
0x00000004, 0x00050048, 0x00000064, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x00000064,
0x00000003, 0x00040047, 0x00000066, 0x00000022,
0x00000000, 0x00040047, 0x00000066, 0x00000021,
0x00000001, 0x00040047, 0x0000006c, 0x00000006,
0x00000004, 0x00050048, 0x0000006d, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x0000006d,
0x00000003, 0x00040047, 0x0000006f, 0x00000022,
0x00000000, 0x00040047, 0x0000006f, 0x00000021,
0x00000002, 0x00040047, 0x00000080, 0x00000001,
0x00000000, 0x00040047, 0x0000008a, 0x00000006,
0x00000004, 0x00050048, 0x0000008b, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x0000008b,
0x00000003, 0x00040047, 0x0000008d, 0x00000022,
0x00000000, 0x00040047, 0x0000008d, 0x00000021,
0x00000004, 0x00040047, 0x00000092, 0x00000006,
0x00000004, 0x00050048, 0x00000093, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x00000093,
0x00000003, 0x00040047, 0x00000095, 0x00000022,
0x00000000, 0x00040047, 0x00000095, 0x00000021,
0x00000005, 0x00040047, 0x0000009b, 0x00000006,
0x00000004, 0x00050048, 0x0000009c, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x0000009c,
0x00000003, 0x00040047, 0x0000009e, 0x00000022,
0x00000000, 0x00040047, 0x0000009e, 0x00000021,
0x00000007, 0x00040047, 0x000000a2, 0x00000006,
0x00000004, 0x00050048, 0x000000a3, 0x00000000,
0x00000023, 0x00000000, 0x00030047, 0x000000a3,
0x00000003, 0x00040047, 0x000000a5, 0x00000022,
0x00000000, 0x00040047, 0x000000a5, 0x00000021,
0x00000008, 0x00040047, 0x000000ad, 0x0000000b,
0x00000019, 0x00020013, 0x00000002, 0x00030021,
0x00000003, 0x00000002, 0x00030016, 0x00000006,
0x00000020, 0x00040020, 0x00000007, 0x00000007,
0x00000006, 0x00040021, 0x00000008, 0x00000006,
0x00000007, 0x00040017, 0x0000000c, 0x00000006,
0x00000002, 0x00040020, 0x0000000d, 0x00000007,
0x0000000c, 0x00060021, 0x0000000e, 0x00000006,
0x0000000d, 0x0000000d, 0x00000007, 0x00050021,
0x00000014, 0x00000006, 0x00000007, 0x00000007,
0x0004002b, 0x00000006, 0x00000019, 0x3f800000,
0x00040015, 0x0000003c, 0x00000020, 0x00000000,
0x00040020, 0x0000003d, 0x00000007, 0x0000003c,
0x00040017, 0x0000003f, 0x0000003c, 0x00000003,
0x00040020, 0x00000040, 0x00000001, 0x0000003f,
0x0004003b, 0x00000040, 0x00000041, 0x00000001,
0x0004002b, 0x0000003c, 0x00000042, 0x00000000,
0x00040020, 0x00000043, 0x00000001, 0x0000003c,
0x0003001d, 0x00000047, 0x00000006, 0x0003001e,
0x00000048, 0x00000047, 0x00040020, 0x00000049,
0x00000002, 0x00000048, 0x0004003b, 0x00000049,
0x0000004a, 0x00000002, 0x00040015, 0x0000004b,
0x00000020, 0x00000001, 0x0004002b, 0x0000004b,
0x0000004c, 0x00000000, 0x00040020, 0x0000004d,
0x00000002, 0x00000006, 0x0004002b, 0x0000004b,
0x00000050, 0x00000001, 0x0003001d, 0x00000055,
0x00000006, 0x0003001e, 0x00000056, 0x00000055,
0x00040020, 0x00000057, 0x00000002, 0x00000056,
0x0004003b, 0x00000057, 0x00000058, 0x00000002,
0x0003001d, 0x0000005c, 0x00000006, 0x0003001e,
0x0000005d, 0x0000005c, 0x00040020, 0x0000005e,
0x00000002, 0x0000005d, 0x0004003b, 0x0000005e,
0x0000005f, 0x00000002, 0x0003001d, 0x00000063,
0x00000006, 0x0003001e, 0x00000064, 0x00000063,
0x00040020, 0x00000065, 0x00000002, 0x00000064,
0x0004003b, 0x00000065, 0x00000066, 0x00000002,
0x0003001d, 0x0000006c, 0x00000006, 0x0003001e,
0x0000006d, 0x0000006c, 0x00040020, 0x0000006e,
0x00000002, 0x0000006d, 0x0004003b, 0x0000006e,
0x0000006f, 0x00000002, 0x00040032, 0x00000006,
0x00000080, 0x00000000, 0x0003001d, 0x0000008a,
0x00000006, 0x0003001e, 0x0000008b, 0x0000008a,
0x00040020, 0x0000008c, 0x00000002, 0x0000008b,
0x0004003b, 0x0000008c, 0x0000008d, 0x00000002,
0x0003001d, 0x00000092, 0x00000006, 0x0003001e,
0x00000093, 0x00000092, 0x00040020, 0x00000094,
0x00000002, 0x00000093, 0x0004003b, 0x00000094,
0x00000095, 0x00000002, 0x0004002b, 0x0000003c,
0x00000097, 0x00000001, 0x0003001d, 0x0000009b,
0x00000006, 0x0003001e, 0x0000009c, 0x0000009b,
0x00040020, 0x0000009d, 0x00000002, 0x0000009c,
0x0004003b, 0x0000009d, 0x0000009e, 0x00000002,
0x0003001d, 0x000000a2, 0x00000006, 0x0003001e,
0x000000a3, 0x000000a2, 0x00040020, 0x000000a4,
0x00000002, 0x000000a3, 0x0004003b, 0x000000a4,
0x000000a5, 0x00000002, 0x0006002c, 0x0000003f,
0x000000ad, 0x00000097, 0x00000097, 0x00000097,
0x00050036, 0x00000002, 0x00000004, 0x00000000,
0x00000003, 0x000200f8, 0x00000005, 0x0004003b,
0x0000003d, 0x0000003e, 0x00000007, 0x0004003b,
0x0000000d, 0x00000046, 0x00000007, 0x0004003b,
0x00000007, 0x00000054, 0x00000007, 0x0004003b,
0x0000000d, 0x0000005b, 0x00000007, 0x0004003b,
0x00000007, 0x0000006b, 0x00000007, 0x0004003b,
0x00000007, 0x00000073, 0x00000007, 0x0004003b,
0x0000000d, 0x00000074, 0x00000007, 0x0004003b,
0x0000000d, 0x00000076, 0x00000007, 0x0004003b,
0x00000007, 0x00000078, 0x00000007, 0x0004003b,
0x00000007, 0x0000007b, 0x00000007, 0x0004003b,
0x0000000d, 0x0000007f, 0x00000007, 0x0004003b,
0x00000007, 0x00000086, 0x00000007, 0x0004003b,
0x00000007, 0x000000a7, 0x00000007, 0x0004003b,
0x00000007, 0x000000a9, 0x00000007, 0x00050041,
0x00000043, 0x00000044, 0x00000041, 0x00000042,
0x0004003d, 0x0000003c, 0x00000045, 0x00000044,
0x0003003e, 0x0000003e, 0x00000045, 0x00060041,
0x0000004d, 0x0000004e, 0x0000004a, 0x0000004c,
0x0000004c, 0x0004003d, 0x00000006, 0x0000004f,
0x0000004e, 0x00060041, 0x0000004d, 0x00000051,
0x0000004a, 0x0000004c, 0x00000050, 0x0004003d,
0x00000006, 0x00000052, 0x00000051, 0x00050050,
0x0000000c, 0x00000053, 0x0000004f, 0x00000052,
0x0003003e, 0x00000046, 0x00000053, 0x00060041,
0x0000004d, 0x00000059, 0x00000058, 0x0000004c,
0x0000004c, 0x0004003d, 0x00000006, 0x0000005a,
0x00000059, 0x0003003e, 0x00000054, 0x0000005a,
0x0004003d, 0x0000003c, 0x00000060, 0x0000003e,
0x00060041, 0x0000004d, 0x00000061, 0x0000005f,
0x0000004c, 0x00000060, 0x0004003d, 0x00000006,
0x00000062, 0x00000061, 0x0004003d, 0x0000003c,
0x00000067, 0x0000003e, 0x00060041, 0x0000004d,
0x00000068, 0x00000066, 0x0000004c, 0x00000067,
0x0004003d, 0x00000006, 0x00000069, 0x00000068,
0x00050050, 0x0000000c, 0x0000006a, 0x00000062,
0x00000069, 0x0003003e, 0x0000005b, 0x0000006a,
0x0004003d, 0x0000003c, 0x00000070, 0x0000003e,
0x00060041, 0x0000004d, 0x00000071, 0x0000006f,
0x0000004c, 0x00000070, 0x0004003d, 0x00000006,
0x00000072, 0x00000071, 0x0003003e, 0x0000006b,
0x00000072, 0x0004003d, 0x0000000c, 0x00000075,
0x0000005b, 0x0003003e, 0x00000074, 0x00000075,
0x0004003d, 0x0000000c, 0x00000077, 0x00000046,
0x0003003e, 0x00000076, 0x00000077, 0x0004003d,
0x00000006, 0x00000079, 0x00000054, 0x0003003e,
0x00000078, 0x00000079, 0x00070039, 0x00000006,
0x0000007a, 0x00000012, 0x00000074, 0x00000076,
0x00000078, 0x0003003e, 0x00000073, 0x0000007a,
0x0004003d, 0x00000006, 0x0000007c, 0x00000073,
0x0004003d, 0x00000006, 0x0000007d, 0x0000006b,
0x00050083, 0x00000006, 0x0000007e, 0x0000007c,
0x0000007d, 0x0003003e, 0x0000007b, 0x0000007e,
0x00050088, 0x00000006, 0x00000081, 0x00000019,
0x00000080, 0x0004003d, 0x0000000c, 0x00000082,
0x0000005b, 0x0005008e, 0x0000000c, 0x00000083,
0x00000082, 0x00000081, 0x0004003d, 0x00000006,
0x00000084, 0x0000007b, 0x0005008e, 0x0000000c,
0x00000085, 0x00000083, 0x00000084, 0x0003003e,
0x0000007f, 0x00000085, 0x00050088, 0x00000006,
0x00000087, 0x00000019, 0x00000080, 0x0004003d,
0x00000006, 0x00000088, 0x0000007b, 0x00050085,
0x00000006, 0x00000089, 0x00000087, 0x00000088,
0x0003003e, 0x00000086, 0x00000089, 0x0004003d,
0x0000003c, 0x0000008e, 0x0000003e, 0x00050041,
0x00000007, 0x0000008f, 0x0000007f, 0x00000042,
0x0004003d, 0x00000006, 0x00000090, 0x0000008f,
0x00060041, 0x0000004d, 0x00000091, 0x0000008d,
0x0000004c, 0x0000008e, 0x0003003e, 0x00000091,
0x00000090, 0x0004003d, 0x0000003c, 0x00000096,
0x0000003e, 0x00050041, 0x00000007, 0x00000098,
0x0000007f, 0x00000097, 0x0004003d, 0x00000006,
0x00000099, 0x00000098, 0x00060041, 0x0000004d,
0x0000009a, 0x00000095, 0x0000004c, 0x00000096,
0x0003003e, 0x0000009a, 0x00000099, 0x0004003d,
0x0000003c, 0x0000009f, 0x0000003e, 0x0004003d,
0x00000006, 0x000000a0, 0x00000086, 0x00060041,
0x0000004d, 0x000000a1, 0x0000009e, 0x0000004c,
0x0000009f, 0x0003003e, 0x000000a1, 0x000000a0,
0x0004003d, 0x0000003c, 0x000000a6, 0x0000003e,
0x0004003d, 0x00000006, 0x000000a8, 0x00000073,
0x0003003e, 0x000000a7, 0x000000a8, 0x0004003d,
0x00000006, 0x000000aa, 0x0000006b, 0x0003003e,
0x000000a9, 0x000000aa, 0x00060039, 0x00000006,
0x000000ab, 0x00000017, 0x000000a7, 0x000000a9,
0x00060041, 0x0000004d, 0x000000ac, 0x000000a5,
0x0000004c, 0x000000a6, 0x0003003e, 0x000000ac,
0x000000ab, 0x000100fd, 0x00010038, 0x00050036,
0x00000006, 0x0000000a, 0x00000000, 0x00000008,
0x00030037, 0x00000007, 0x00000009, 0x000200f8,
0x0000000b, 0x0004003d, 0x00000006, 0x0000001a,
0x00000009, 0x0004007f, 0x00000006, 0x0000001b,
0x0000001a, 0x0006000c, 0x00000006, 0x0000001c,
0x00000001, 0x0000001b, 0x0000001b, 0x00050081,
0x00000006, 0x0000001d, 0x00000019, 0x0000001c,
0x00050088, 0x00000006, 0x0000001e, 0x00000019,
0x0000001d, 0x000200fe, 0x0000001e, 0x00010038,
0x00050036, 0x00000006, 0x00000012, 0x00000000,
0x0000000e, 0x00030037, 0x0000000d, 0x0000000f,
0x00030037, 0x0000000d, 0x00000010, 0x00030037,
0x00000007, 0x00000011, 0x000200f8, 0x00000013,
0x0004003b, 0x00000007, 0x00000021, 0x00000007,
0x0004003b, 0x00000007, 0x00000027, 0x00000007,
0x0004003b, 0x00000007, 0x00000028, 0x00000007,
0x0004003d, 0x0000000c, 0x00000022, 0x00000010,
0x0004003d, 0x0000000c, 0x00000023, 0x0000000f,
0x00050094, 0x00000006, 0x00000024, 0x00000022,
0x00000023, 0x0004003d, 0x00000006, 0x00000025,
0x00000011, 0x00050081, 0x00000006, 0x00000026,
0x00000024, 0x00000025, 0x0003003e, 0x00000021,
0x00000026, 0x0004003d, 0x00000006, 0x00000029,
0x00000021, 0x0003003e, 0x00000028, 0x00000029,
0x00050039, 0x00000006, 0x0000002a, 0x0000000a,
0x00000028, 0x0003003e, 0x00000027, 0x0000002a,
0x0004003d, 0x00000006, 0x0000002b, 0x00000027,
0x000200fe, 0x0000002b, 0x00010038, 0x00050036,
0x00000006, 0x00000017, 0x00000000, 0x00000014,
0x00030037, 0x00000007, 0x00000015, 0x00030037,
0x00000007, 0x00000016, 0x000200f8, 0x00000018,
0x0004003d, 0x00000006, 0x0000002e, 0x00000016,
0x0004003d, 0x00000006, 0x0000002f, 0x00000015,
0x0006000c, 0x00000006, 0x00000030, 0x00000001,
0x0000001c, 0x0000002f, 0x00050085, 0x00000006,
0x00000031, 0x0000002e, 0x00000030, 0x0004003d,
0x00000006, 0x00000032, 0x00000016, 0x00050083,
0x00000006, 0x00000033, 0x00000019, 0x00000032,
0x0004003d, 0x00000006, 0x00000034, 0x00000015,
0x00050083, 0x00000006, 0x00000035, 0x00000019,
0x00000034, 0x0006000c, 0x00000006, 0x00000036,
0x00000001, 0x0000001c, 0x00000035, 0x00050085,
0x00000006, 0x00000037, 0x00000033, 0x00000036,
0x00050081, 0x00000006, 0x00000038, 0x00000031,
0x00000037, 0x0004007f, 0x00000006, 0x00000039,
0x00000038, 0x000200fe, 0x00000039, 0x00010038 };
} // namespace kp

View file

@ -0,0 +1,28 @@
#version 450
layout(set = 0, binding = 0) buffer tensorLhs {
float valuesLhs[ ];
};
layout(set = 0, binding = 1) buffer tensorRhs {
float valuesRhs[ ];
};
layout(set = 0, binding = 2) buffer tensorOutput {
float valuesOutput[ ];
};
layout (constant_id = 0) const uint LEN_LHS = 0;
layout (constant_id = 1) const uint LEN_RHS = 0;
layout (constant_id = 2) const uint LEN_OUT = 0;
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint index = gl_GlobalInvocationID.x;
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}

View file

@ -0,0 +1,101 @@
#pragma once
#include <array>
#include <cstdint>
namespace kp {
const std::array<uint32_t, 366> SHADEROPMULT_COMP_SPV = {
0x07230203, 0x00010000, 0x0008000a, 0x0000002e,
0x00000000, 0x00020011, 0x00000001, 0x0006000b,
0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e,
0x00000000, 0x0003000e, 0x00000000, 0x00000001,
0x0006000f, 0x00000005, 0x00000004, 0x6e69616d,
0x00000000, 0x0000000b, 0x00060010, 0x00000004,
0x00000011, 0x00000001, 0x00000001, 0x00000001,
0x00030003, 0x00000002, 0x000001c2, 0x00040005,
0x00000004, 0x6e69616d, 0x00000000, 0x00040005,
0x00000008, 0x65646e69, 0x00000078, 0x00080005,
0x0000000b, 0x475f6c67, 0x61626f6c, 0x766e496c,
0x7461636f, 0x496e6f69, 0x00000044, 0x00060005,
0x00000012, 0x736e6574, 0x754f726f, 0x74757074,
0x00000000, 0x00070006, 0x00000012, 0x00000000,
0x756c6176, 0x754f7365, 0x74757074, 0x00000000,
0x00030005, 0x00000014, 0x00000000, 0x00050005,
0x00000019, 0x736e6574, 0x684c726f, 0x00000073,
0x00060006, 0x00000019, 0x00000000, 0x756c6176,
0x684c7365, 0x00000073, 0x00030005, 0x0000001b,
0x00000000, 0x00050005, 0x00000021, 0x736e6574,
0x6852726f, 0x00000073, 0x00060006, 0x00000021,
0x00000000, 0x756c6176, 0x68527365, 0x00000073,
0x00030005, 0x00000023, 0x00000000, 0x00040005,
0x00000029, 0x5f4e454c, 0x0053484c, 0x00040005,
0x0000002a, 0x5f4e454c, 0x00534852, 0x00040005,
0x0000002b, 0x5f4e454c, 0x0054554f, 0x00040047,
0x0000000b, 0x0000000b, 0x0000001c, 0x00040047,
0x00000011, 0x00000006, 0x00000004, 0x00050048,
0x00000012, 0x00000000, 0x00000023, 0x00000000,
0x00030047, 0x00000012, 0x00000003, 0x00040047,
0x00000014, 0x00000022, 0x00000000, 0x00040047,
0x00000014, 0x00000021, 0x00000002, 0x00040047,
0x00000018, 0x00000006, 0x00000004, 0x00050048,
0x00000019, 0x00000000, 0x00000023, 0x00000000,
0x00030047, 0x00000019, 0x00000003, 0x00040047,
0x0000001b, 0x00000022, 0x00000000, 0x00040047,
0x0000001b, 0x00000021, 0x00000000, 0x00040047,
0x00000020, 0x00000006, 0x00000004, 0x00050048,
0x00000021, 0x00000000, 0x00000023, 0x00000000,
0x00030047, 0x00000021, 0x00000003, 0x00040047,
0x00000023, 0x00000022, 0x00000000, 0x00040047,
0x00000023, 0x00000021, 0x00000001, 0x00040047,
0x00000029, 0x00000001, 0x00000000, 0x00040047,
0x0000002a, 0x00000001, 0x00000001, 0x00040047,
0x0000002b, 0x00000001, 0x00000002, 0x00040047,
0x0000002d, 0x0000000b, 0x00000019, 0x00020013,
0x00000002, 0x00030021, 0x00000003, 0x00000002,
0x00040015, 0x00000006, 0x00000020, 0x00000000,
0x00040020, 0x00000007, 0x00000007, 0x00000006,
0x00040017, 0x00000009, 0x00000006, 0x00000003,
0x00040020, 0x0000000a, 0x00000001, 0x00000009,
0x0004003b, 0x0000000a, 0x0000000b, 0x00000001,
0x0004002b, 0x00000006, 0x0000000c, 0x00000000,
0x00040020, 0x0000000d, 0x00000001, 0x00000006,
0x00030016, 0x00000010, 0x00000020, 0x0003001d,
0x00000011, 0x00000010, 0x0003001e, 0x00000012,
0x00000011, 0x00040020, 0x00000013, 0x00000002,
0x00000012, 0x0004003b, 0x00000013, 0x00000014,
0x00000002, 0x00040015, 0x00000015, 0x00000020,
0x00000001, 0x0004002b, 0x00000015, 0x00000016,
0x00000000, 0x0003001d, 0x00000018, 0x00000010,
0x0003001e, 0x00000019, 0x00000018, 0x00040020,
0x0000001a, 0x00000002, 0x00000019, 0x0004003b,
0x0000001a, 0x0000001b, 0x00000002, 0x00040020,
0x0000001d, 0x00000002, 0x00000010, 0x0003001d,
0x00000020, 0x00000010, 0x0003001e, 0x00000021,
0x00000020, 0x00040020, 0x00000022, 0x00000002,
0x00000021, 0x0004003b, 0x00000022, 0x00000023,
0x00000002, 0x00040032, 0x00000006, 0x00000029,
0x00000000, 0x00040032, 0x00000006, 0x0000002a,
0x00000000, 0x00040032, 0x00000006, 0x0000002b,
0x00000000, 0x0004002b, 0x00000006, 0x0000002c,
0x00000001, 0x0006002c, 0x00000009, 0x0000002d,
0x0000002c, 0x0000002c, 0x0000002c, 0x00050036,
0x00000002, 0x00000004, 0x00000000, 0x00000003,
0x000200f8, 0x00000005, 0x0004003b, 0x00000007,
0x00000008, 0x00000007, 0x00050041, 0x0000000d,
0x0000000e, 0x0000000b, 0x0000000c, 0x0004003d,
0x00000006, 0x0000000f, 0x0000000e, 0x0003003e,
0x00000008, 0x0000000f, 0x0004003d, 0x00000006,
0x00000017, 0x00000008, 0x0004003d, 0x00000006,
0x0000001c, 0x00000008, 0x00060041, 0x0000001d,
0x0000001e, 0x0000001b, 0x00000016, 0x0000001c,
0x0004003d, 0x00000010, 0x0000001f, 0x0000001e,
0x0004003d, 0x00000006, 0x00000024, 0x00000008,
0x00060041, 0x0000001d, 0x00000025, 0x00000023,
0x00000016, 0x00000024, 0x0004003d, 0x00000010,
0x00000026, 0x00000025, 0x00050085, 0x00000010,
0x00000027, 0x0000001f, 0x00000026, 0x00060041,
0x0000001d, 0x00000028, 0x00000014, 0x00000016,
0x00000017, 0x0003003e, 0x00000028, 0x00000027,
0x000100fd, 0x00010038 };
} // namespace kp

View file

@ -0,0 +1,29 @@
// Copyright 2020 Google LLC
RWStructuredBuffer<uint> values : register(u0);
[[vk::constant_id(0)]] const uint BUFFER_ELEMENTS = 32;
uint fibonacci(uint n) {
if(n <= 1){
return n;
}
uint curr = 1;
uint prev = 1;
for(uint i = 2; i < n; ++i) {
uint temp = curr;
curr += prev;
prev = temp;
}
return curr;
}
[numthreads(1, 1, 1)]
void main(uint3 GlobalInvocationID : SV_DispatchThreadID)
{
uint index = GlobalInvocationID.x;
if (index >= BUFFER_ELEMENTS)
return;
values[index] = fibonacci(values[index]);
}