Cleanup tweaks and DSC class. The file copy raid functionality is not protected by an named ifdef
This commit is contained in:
parent
b11224c5e1
commit
1a07f60451
3 changed files with 112 additions and 48 deletions
84
ggml-cuda.cu
84
ggml-cuda.cu
|
@ -379,13 +379,35 @@ struct ggml_backend_cuda_buffer_context {
|
||||||
int device;
|
int device;
|
||||||
void * dev_ptr = nullptr;
|
void * dev_ptr = nullptr;
|
||||||
std::string name;
|
std::string name;
|
||||||
|
#if defined(GGML_ENABLE_DIRECT_STORAGE_CUDA)
|
||||||
|
std::unique_ptr<InteropBuffer> direct_storage_buffer;
|
||||||
|
#endif
|
||||||
|
|
||||||
ggml_backend_cuda_buffer_context(int device, void * dev_ptr) :
|
ggml_backend_cuda_buffer_context(int device, void * dev_ptr) :
|
||||||
device(device), dev_ptr(dev_ptr),
|
device(device), dev_ptr(dev_ptr),
|
||||||
name(GGML_CUDA_NAME + std::to_string(device)) {
|
name(GGML_CUDA_NAME + std::to_string(device)) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(GGML_ENABLE_DIRECT_STORAGE_CUDA)
|
||||||
|
ggml_backend_cuda_buffer_context(int device, std::unique_ptr<InteropBuffer> && direct_storage_buffer_) :
|
||||||
|
device(device), dev_ptr(nullptr),
|
||||||
|
name(GGML_CUDA_NAME + std::to_string(device)),
|
||||||
|
direct_storage_buffer(std::move(direct_storage_buffer_))
|
||||||
|
{
|
||||||
|
dev_ptr = direct_storage_buffer->get_device_ptr();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
~ggml_backend_cuda_buffer_context() {
|
~ggml_backend_cuda_buffer_context() {
|
||||||
|
#if defined(GGML_ENABLE_DIRECT_STORAGE_CUDA)
|
||||||
|
if (direct_storage_buffer) {
|
||||||
|
direct_storage_buffer.reset();
|
||||||
|
dev_ptr = nullptr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
CUDA_CHECK(cudaFree(dev_ptr));
|
CUDA_CHECK(cudaFree(dev_ptr));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -451,7 +473,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
||||||
if (size & (1u << 31)) {
|
if (size & (1u << 31)) {
|
||||||
size &= ~(1u << 31);
|
size &= ~(1u << 31);
|
||||||
if (data == nullptr) {
|
if (data == nullptr) {
|
||||||
dsc->flush();
|
std::cout << "flush" << std::endl;
|
||||||
|
dsc->flush(true);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
struct Temp {
|
struct Temp {
|
||||||
|
@ -465,7 +488,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
||||||
if (it == files.end()) {
|
if (it == files.end()) {
|
||||||
files[filename].handles.push_back(dsc->openFile(filename));
|
files[filename].handles.push_back(dsc->openFile(filename));
|
||||||
|
|
||||||
#if 0
|
#define COPY_RAID
|
||||||
|
#if defined(COPY_RAID)
|
||||||
// This is a hack to evaluate how fast data can be read from a 2nd disk.
|
// This is a hack to evaluate how fast data can be read from a 2nd disk.
|
||||||
std::filesystem::path p(filename);
|
std::filesystem::path p(filename);
|
||||||
std::filesystem::path p2("d:");
|
std::filesystem::path p2("d:");
|
||||||
|
@ -482,7 +506,34 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
||||||
it = files.find(filename);
|
it = files.find(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//dsc->loadFile(it->second.getFile(), t->weights_off, size, (char*)tensor->data + offset);
|
||||||
|
if (ctx->direct_storage_buffer) {
|
||||||
|
size_t tensor_offset = (char*)tensor->data - (char*)ctx->direct_storage_buffer->get_device_ptr();
|
||||||
|
#if defined(COPY_RAID)
|
||||||
|
size_t blocksize = 4 * 1024 * 1024;
|
||||||
|
for (size_t idx = 0; idx < size; idx += blocksize) {
|
||||||
|
size_t read_len = size - idx;
|
||||||
|
if (read_len > blocksize)
|
||||||
|
read_len = blocksize;
|
||||||
|
dsc->loadFile(it->second.getFile(), t->weights_off + idx, read_len, ctx->direct_storage_buffer.get(), offset + tensor_offset + idx);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
dsc->loadFile(it->second.getFile(), t->weights_off, size, ctx->direct_storage_buffer.get(), offset + tensor_offset);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
#if defined(COPY_RAID)
|
||||||
|
size_t blocksize = 2 * 1024 * 1024;
|
||||||
|
for (size_t idx = 0; idx < size; idx += blocksize) {
|
||||||
|
size_t read_len = size - idx;
|
||||||
|
if (read_len > blocksize)
|
||||||
|
read_len = blocksize;
|
||||||
|
dsc->loadFile(it->second.getFile(), t->weights_off + idx, read_len, (char*)tensor->data + offset + idx);
|
||||||
|
}
|
||||||
|
#else
|
||||||
dsc->loadFile(it->second.getFile(), t->weights_off, size, (char*)tensor->data + offset);
|
dsc->loadFile(it->second.getFile(), t->weights_off, size, (char*)tensor->data + offset);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -561,15 +612,20 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffe
|
||||||
|
|
||||||
size = std::max(size, (size_t)1); // cudaMalloc returns null for size 0
|
size = std::max(size, (size_t)1); // cudaMalloc returns null for size 0
|
||||||
|
|
||||||
void * dev_ptr;
|
#if defined(GGML_ENABLE_DIRECT_STORAGE_CUDA)
|
||||||
|
if (size < 512 * 1024 * 1024) {
|
||||||
|
auto interop_buffer = dsc->create_interop_buffer(size);
|
||||||
|
ggml_backend_cuda_buffer_context* ctx = new ggml_backend_cuda_buffer_context(buft_ctx->device, std::move(interop_buffer));
|
||||||
|
return ggml_backend_buffer_init(buft, ggml_backend_cuda_buffer_interface, ctx, size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
void* dev_ptr;
|
||||||
cudaError_t err = cudaMalloc(&dev_ptr, size);
|
cudaError_t err = cudaMalloc(&dev_ptr, size);
|
||||||
if (err != cudaSuccess) {
|
if (err != cudaSuccess) {
|
||||||
fprintf(stderr, "%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size/1024.0/1024.0, buft_ctx->device, cudaGetErrorString(err));
|
fprintf(stderr, "%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
ggml_backend_cuda_buffer_context* ctx = new ggml_backend_cuda_buffer_context(buft_ctx->device, dev_ptr);
|
||||||
ggml_backend_cuda_buffer_context * ctx = new ggml_backend_cuda_buffer_context(buft_ctx->device, dev_ptr);
|
|
||||||
|
|
||||||
return ggml_backend_buffer_init(buft, ggml_backend_cuda_buffer_interface, ctx, size);
|
return ggml_backend_buffer_init(buft, ggml_backend_cuda_buffer_interface, ctx, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -605,11 +661,21 @@ GGML_CALL static bool ggml_backend_cuda_buffer_type_supports_backend(ggml_backen
|
||||||
return buft_ctx->device == cuda_ctx->device;
|
return buft_ctx->device == cuda_ctx->device;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_CALL static size_t ggml_backend_cuda_get_max_size(ggml_backend_buffer_type_t buft)
|
||||||
|
{
|
||||||
|
#if defined(GGML_ENABLE_DIRECT_STORAGE_CUDA)
|
||||||
|
//return 512 * 1024 * 1024; // dx interop limit
|
||||||
|
return SIZE_MAX;
|
||||||
|
#else
|
||||||
|
return SIZE_MAX;
|
||||||
|
#endif
|
||||||
|
}// allocation max size
|
||||||
|
|
||||||
static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
|
static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
/* .get_max_size = */ ggml_backend_cuda_get_max_size, // defaults to SIZE_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cuda_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_cuda_buffer_type_supports_backend,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
|
@ -774,7 +840,7 @@ GGML_CALL static const char * ggml_backend_cuda_split_buffer_get_name(ggml_backe
|
||||||
|
|
||||||
static bool ggml_backend_buffer_is_cuda_split(ggml_backend_buffer_t buffer) {
|
static bool ggml_backend_buffer_is_cuda_split(ggml_backend_buffer_t buffer) {
|
||||||
return buffer->iface.get_name == ggml_backend_cuda_split_buffer_get_name;
|
return buffer->iface.get_name == ggml_backend_cuda_split_buffer_get_name;
|
||||||
GGML_UNUSED(ggml_backend_buffer_is_cuda_split); // only used in debug builds currently, avoid unused function warning in release builds
|
GGML_UNUSED(&ggml_backend_buffer_is_cuda_split); // only used in debug builds currently, avoid unused function warning in release builds
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||||
|
|
|
@ -124,14 +124,16 @@ public:
|
||||||
D3D12_HEAP_PROPERTIES bufferHeapProps = {};
|
D3D12_HEAP_PROPERTIES bufferHeapProps = {};
|
||||||
bufferHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
|
bufferHeapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||||
|
|
||||||
|
#define USE_HEAP
|
||||||
|
|
||||||
|
#if defined(USE_HEAP)
|
||||||
D3D12_HEAP_DESC hd = {};
|
D3D12_HEAP_DESC hd = {};
|
||||||
hd.SizeInBytes = size;
|
hd.SizeInBytes = size;
|
||||||
hd.Properties = bufferHeapProps;
|
hd.Properties = bufferHeapProps;
|
||||||
hd.Flags = D3D12_HEAP_FLAG_SHARED;
|
hd.Flags = D3D12_HEAP_FLAG_SHARED;
|
||||||
hd.Alignment = 0;
|
hd.Alignment = 0;
|
||||||
d3d_device->CreateHeap(&hd, IID_PPV_ARGS(&m_d3d_heap));
|
d3d_device->CreateHeap(&hd, IID_PPV_ARGS(&m_d3d_heap));
|
||||||
|
#endif
|
||||||
|
|
||||||
D3D12_RESOURCE_DESC bufferDesc = {};
|
D3D12_RESOURCE_DESC bufferDesc = {};
|
||||||
bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||||
|
@ -143,16 +145,7 @@ public:
|
||||||
bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||||
bufferDesc.SampleDesc.Count = 1;
|
bufferDesc.SampleDesc.Count = 1;
|
||||||
|
|
||||||
//#define USE_BUFFER
|
#if defined(USE_HEAP)
|
||||||
#if defined(USE_BUFFER) //
|
|
||||||
winrt::check_hresult(d3d_device->CreateCommittedResource(
|
|
||||||
&bufferHeapProps,
|
|
||||||
D3D12_HEAP_FLAG_NONE | D3D12_HEAP_FLAG_SHARED,
|
|
||||||
&bufferDesc,
|
|
||||||
D3D12_RESOURCE_STATE_COMMON,
|
|
||||||
nullptr,
|
|
||||||
IID_PPV_ARGS(m_d3d_buffer.put())));
|
|
||||||
#else
|
|
||||||
winrt::check_hresult(d3d_device->CreatePlacedResource(
|
winrt::check_hresult(d3d_device->CreatePlacedResource(
|
||||||
m_d3d_heap.get(),
|
m_d3d_heap.get(),
|
||||||
0,
|
0,
|
||||||
|
@ -160,10 +153,17 @@ public:
|
||||||
D3D12_RESOURCE_STATE_COMMON,
|
D3D12_RESOURCE_STATE_COMMON,
|
||||||
nullptr,
|
nullptr,
|
||||||
IID_PPV_ARGS(m_d3d_buffer.put())));
|
IID_PPV_ARGS(m_d3d_buffer.put())));
|
||||||
|
#else
|
||||||
|
winrt::check_hresult(d3d_device->CreateCommittedResource(
|
||||||
|
&bufferHeapProps,
|
||||||
|
D3D12_HEAP_FLAG_NONE | D3D12_HEAP_FLAG_SHARED,
|
||||||
|
&bufferDesc,
|
||||||
|
D3D12_RESOURCE_STATE_COMMON,
|
||||||
|
nullptr,
|
||||||
|
IID_PPV_ARGS(m_d3d_buffer.put())));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if defined(DEBUG_READBACK)
|
||||||
// debug begin
|
|
||||||
bufferHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
bufferHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||||
winrt::check_hresult(d3d_device->CreateCommittedResource(
|
winrt::check_hresult(d3d_device->CreateCommittedResource(
|
||||||
&bufferHeapProps,
|
&bufferHeapProps,
|
||||||
|
@ -176,27 +176,26 @@ public:
|
||||||
m_host_buffer->Map(0, nullptr, &m_host_ptr);
|
m_host_buffer->Map(0, nullptr, &m_host_ptr);
|
||||||
d3d_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_cmdallocator));
|
d3d_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_cmdallocator));
|
||||||
d3d_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_cmdallocator.get(), nullptr, IID_PPV_ARGS(&m_cmdlist));
|
d3d_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_cmdallocator.get(), nullptr, IID_PPV_ARGS(&m_cmdlist));
|
||||||
#endif
|
|
||||||
|
|
||||||
D3D12_COMMAND_QUEUE_DESC qd = {};
|
D3D12_COMMAND_QUEUE_DESC qd = {};
|
||||||
qd.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
qd.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||||||
d3d_device->CreateCommandQueue(&qd, IID_PPV_ARGS(&m_cmd_queue));
|
d3d_device->CreateCommandQueue(&qd, IID_PPV_ARGS(&m_cmd_queue));
|
||||||
// debug end
|
#endif
|
||||||
|
|
||||||
// create a shared handle to require to import the d3d buffer into CUDA
|
// create a shared handle to require to import the d3d buffer into CUDA
|
||||||
HANDLE sharedHandle;
|
HANDLE sharedHandle;
|
||||||
WindowsSecurityAttributes windowsSecurityAttributes;
|
WindowsSecurityAttributes windowsSecurityAttributes;
|
||||||
LPCWSTR name = NULL;
|
LPCWSTR name = NULL;
|
||||||
#if USE_BUFFER
|
#if defined(USE_HEAP)
|
||||||
d3d_device->CreateSharedHandle(m_d3d_buffer.get(), windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
|
|
||||||
|
|
||||||
cudaExternalMemoryHandleDesc externalMemoryHandleDesc = {};
|
|
||||||
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
|
|
||||||
#else
|
|
||||||
d3d_device->CreateSharedHandle(m_d3d_heap.get(), windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
|
d3d_device->CreateSharedHandle(m_d3d_heap.get(), windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
|
||||||
|
|
||||||
cudaExternalMemoryHandleDesc externalMemoryHandleDesc = {};
|
cudaExternalMemoryHandleDesc externalMemoryHandleDesc = {};
|
||||||
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Heap;
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Heap;
|
||||||
|
#else
|
||||||
|
d3d_device->CreateSharedHandle(m_d3d_buffer.get(), windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
|
||||||
|
|
||||||
|
cudaExternalMemoryHandleDesc externalMemoryHandleDesc = {};
|
||||||
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
|
||||||
#endif
|
#endif
|
||||||
externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
|
externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
|
||||||
externalMemoryHandleDesc.size = bufferDesc.Width;
|
externalMemoryHandleDesc.size = bufferDesc.Width;
|
||||||
|
@ -212,11 +211,6 @@ public:
|
||||||
externalMemoryBufferDesc.flags = 0;
|
externalMemoryBufferDesc.flags = 0;
|
||||||
|
|
||||||
result = cudaExternalMemoryGetMappedBuffer(&m_cuda_dev_ptr, m_externalMemory, &externalMemoryBufferDesc);
|
result = cudaExternalMemoryGetMappedBuffer(&m_cuda_dev_ptr, m_externalMemory, &externalMemoryBufferDesc);
|
||||||
result = cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
auto err = cudaMemset(m_cuda_dev_ptr, 255, 512*1024*1024);
|
|
||||||
result = cudaDeviceSynchronize();
|
|
||||||
std::cout << "err: " << err << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~InteropBufferImpl() {
|
~InteropBufferImpl() {
|
||||||
|
@ -235,19 +229,19 @@ public:
|
||||||
return m_d3d_buffer.get();
|
return m_d3d_buffer.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(DEBUG_READBACK)
|
||||||
void* get_host_ptr() const {
|
void* get_host_ptr() const {
|
||||||
#if 0
|
|
||||||
m_cmdlist->Reset(m_cmdallocator.get(), nullptr);
|
m_cmdlist->Reset(m_cmdallocator.get(), nullptr);
|
||||||
m_cmdlist->CopyResource(m_host_buffer.get(), m_d3d_buffer.get());
|
m_cmdlist->CopyResource(m_host_buffer.get(), m_d3d_buffer.get());
|
||||||
m_cmdlist->Close();
|
m_cmdlist->Close();
|
||||||
|
|
||||||
ID3D12CommandList *ptr = m_cmdlist.get();
|
ID3D12CommandList *ptr = m_cmdlist.get();
|
||||||
m_cmd_queue->ExecuteCommandLists(1, &ptr);
|
m_cmd_queue->ExecuteCommandLists(1, &ptr);
|
||||||
Sleep(2);
|
Sleep(2); // actually one would have to wait for an event here
|
||||||
#endif
|
|
||||||
|
|
||||||
return m_host_ptr;
|
return m_host_ptr;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
winrt::com_ptr<ID3D12CommandQueue> m_cmd_queue = {};
|
winrt::com_ptr<ID3D12CommandQueue> m_cmd_queue = {};
|
||||||
|
@ -257,11 +251,12 @@ private:
|
||||||
cudaExternalMemory_t m_externalMemory;
|
cudaExternalMemory_t m_externalMemory;
|
||||||
void* m_cuda_dev_ptr;
|
void* m_cuda_dev_ptr;
|
||||||
|
|
||||||
// debug
|
#if defined(DEBUG_READBACK)
|
||||||
winrt::com_ptr<ID3D12Resource> m_host_buffer = {};
|
winrt::com_ptr<ID3D12Resource> m_host_buffer = {};
|
||||||
winrt::com_ptr<ID3D12GraphicsCommandList> m_cmdlist = {};
|
winrt::com_ptr<ID3D12GraphicsCommandList> m_cmdlist = {};
|
||||||
winrt::com_ptr<ID3D12CommandAllocator> m_cmdallocator = {};
|
winrt::com_ptr<ID3D12CommandAllocator> m_cmdallocator = {};
|
||||||
void* m_host_ptr;
|
void* m_host_ptr;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
class DirectStorageCUDAImpl : public DirectStorageCUDA
|
class DirectStorageCUDAImpl : public DirectStorageCUDA
|
||||||
|
@ -450,6 +445,7 @@ private:
|
||||||
InteropBufferImpl* ibi = static_cast<InteropBufferImpl*>(interop_buffer);
|
InteropBufferImpl* ibi = static_cast<InteropBufferImpl*>(interop_buffer);
|
||||||
bool flushed;
|
bool flushed;
|
||||||
while (read_len) {
|
while (read_len) {
|
||||||
|
//std::cout << file.get() << std::endl;
|
||||||
size_t request_size = min(m_chunk_size, read_len);
|
size_t request_size = min(m_chunk_size, read_len);
|
||||||
|
|
||||||
DSTORAGE_REQUEST request = {};
|
DSTORAGE_REQUEST request = {};
|
||||||
|
@ -462,7 +458,6 @@ private:
|
||||||
request.Destination.Buffer.Resource = ibi->get_d3d_buffer();
|
request.Destination.Buffer.Resource = ibi->get_d3d_buffer();
|
||||||
request.Destination.Buffer.Offset = interop_buffer_offset;
|
request.Destination.Buffer.Offset = interop_buffer_offset;
|
||||||
request.Destination.Buffer.Size = request_size;
|
request.Destination.Buffer.Size = request_size;
|
||||||
//std::cout << read_start / (1024*1024) << " / " << interop_buffer_offset / (1024 * 1024) << "/" << request_size / (1024 * 1024) << std::endl;
|
|
||||||
|
|
||||||
m_d3d_storage_queue->EnqueueRequest(&request);
|
m_d3d_storage_queue->EnqueueRequest(&request);
|
||||||
|
|
||||||
|
@ -471,7 +466,7 @@ private:
|
||||||
read_start += request_size;
|
read_start += request_size;
|
||||||
|
|
||||||
m_enqueued = true;
|
m_enqueued = true;
|
||||||
//flush(true);
|
//flush(false); // flushing less often improves perf a little bit, but removes ability to track current load status
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,10 @@ class InteropBuffer {
|
||||||
public:
|
public:
|
||||||
virtual ~InteropBuffer() = 0;
|
virtual ~InteropBuffer() = 0;
|
||||||
virtual void* get_device_ptr() const = 0;
|
virtual void* get_device_ptr() const = 0;
|
||||||
|
|
||||||
|
#if defined(DEBUG_READBACK)
|
||||||
virtual void* get_host_ptr() const = 0;
|
virtual void* get_host_ptr() const = 0;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
class DirectStorageCUDA
|
class DirectStorageCUDA
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue