mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
performance: use staging buffer when writing to an image
Related-To: NEO-12968 Also, don't import usm/mapped allocations for image operations Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0f2f3c3764
commit
cf58be4142
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/utilities/heap_allocator.h"
|
||||
|
||||
@@ -46,10 +47,7 @@ int32_t StagingBufferManager::performChunkCopy(void *chunkDst, const void *chunk
|
||||
auto allocatedSize = size;
|
||||
auto [allocator, chunkBuffer] = requestStagingBuffer(allocatedSize, csr);
|
||||
auto ret = chunkCopyFunc(chunkDst, addrToPtr(chunkBuffer), chunkSrc, size);
|
||||
{
|
||||
auto lock = std::lock_guard<std::mutex>(mtx);
|
||||
trackers.push_back({allocator, chunkBuffer, allocatedSize, csr->peekTaskCount()});
|
||||
}
|
||||
trackChunk({allocator, chunkBuffer, allocatedSize, csr->peekTaskCount()});
|
||||
if (csr->isAnyDirectSubmissionEnabled()) {
|
||||
csr->flushTagUpdate();
|
||||
}
|
||||
@@ -104,10 +102,14 @@ std::pair<HeapAllocator *, uint64_t> StagingBufferManager::requestStagingBuffer(
|
||||
return {retriedAllocator, retriedChunkBuffer};
|
||||
}
|
||||
|
||||
StagingBuffer stagingBuffer{allocateStagingBuffer(), chunkSize};
|
||||
allocator = stagingBuffer.getAllocator();
|
||||
chunkBuffer = allocator->allocate(size);
|
||||
stagingBuffers.push_back(std::move(stagingBuffer));
|
||||
auto stagingBufferSize = alignUp(std::max(chunkSize, size), MemoryConstants::pageSize2M);
|
||||
auto usmHost = allocateStagingBuffer(stagingBufferSize);
|
||||
if (usmHost != nullptr) {
|
||||
StagingBuffer stagingBuffer{usmHost, stagingBufferSize};
|
||||
allocator = stagingBuffer.getAllocator();
|
||||
chunkBuffer = allocator->allocate(size);
|
||||
stagingBuffers.push_back(std::move(stagingBuffer));
|
||||
}
|
||||
return {allocator, chunkBuffer};
|
||||
}
|
||||
|
||||
@@ -129,13 +131,13 @@ std::pair<HeapAllocator *, uint64_t> StagingBufferManager::getExistingBuffer(siz
|
||||
return {allocator, buffer};
|
||||
}
|
||||
|
||||
void *StagingBufferManager::allocateStagingBuffer() {
|
||||
void *StagingBufferManager::allocateStagingBuffer(size_t size) {
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 0u, rootDeviceIndices, deviceBitfields);
|
||||
auto hostPtr = svmAllocsManager->createHostUnifiedMemoryAllocation(chunkSize, unifiedMemoryProperties);
|
||||
auto hostPtr = svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties);
|
||||
return hostPtr;
|
||||
}
|
||||
|
||||
bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const {
|
||||
bool StagingBufferManager::isValidForCopy(const Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const {
|
||||
auto stagingCopyEnabled = device.getProductHelper().isStagingBuffersEnabled();
|
||||
if (debugManager.flags.EnableCopyWithStagingBuffers.get() != -1) {
|
||||
stagingCopyEnabled = debugManager.flags.EnableCopyWithStagingBuffers.get();
|
||||
@@ -150,6 +152,15 @@ bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const vo
|
||||
return stagingCopyEnabled && hostToUsmCopy && !hasDependencies && (isUsedByOsContext || size <= chunkSize);
|
||||
}
|
||||
|
||||
bool StagingBufferManager::isValidForStagingWriteImage(const Device &device, size_t size) const {
|
||||
auto thresholdSizeForImages = 32 * MemoryConstants::megaByte;
|
||||
auto stagingCopyEnabled = false;
|
||||
if (debugManager.flags.EnableCopyWithStagingBuffers.get() != -1) {
|
||||
stagingCopyEnabled = debugManager.flags.EnableCopyWithStagingBuffers.get();
|
||||
}
|
||||
return stagingCopyEnabled && (0 < size && size <= thresholdSizeForImages);
|
||||
}
|
||||
|
||||
void StagingBufferManager::clearTrackedChunks(CommandStreamReceiver *csr) {
|
||||
for (auto iterator = trackers.begin(); iterator != trackers.end();) {
|
||||
if (csr->testTaskCountReady(csr->getTagAddress(), iterator->taskCountToWait)) {
|
||||
@@ -161,4 +172,9 @@ void StagingBufferManager::clearTrackedChunks(CommandStreamReceiver *csr) {
|
||||
}
|
||||
}
|
||||
|
||||
void StagingBufferManager::trackChunk(const StagingBufferTracker &tracker) {
|
||||
auto lock = std::lock_guard<std::mutex>(mtx);
|
||||
trackers.push_back(tracker);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -44,10 +44,10 @@ class StagingBuffer {
|
||||
};
|
||||
|
||||
struct StagingBufferTracker {
|
||||
HeapAllocator *allocator;
|
||||
uint64_t chunkAddress;
|
||||
size_t size;
|
||||
uint64_t taskCountToWait;
|
||||
HeapAllocator *allocator = nullptr;
|
||||
uint64_t chunkAddress = 0;
|
||||
size_t size = 0;
|
||||
uint64_t taskCountToWait = 0;
|
||||
};
|
||||
|
||||
class StagingBufferManager {
|
||||
@@ -59,13 +59,16 @@ class StagingBufferManager {
|
||||
StagingBufferManager &operator=(StagingBufferManager &&other) noexcept = delete;
|
||||
StagingBufferManager &operator=(const StagingBufferManager &other) = delete;
|
||||
|
||||
bool isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const;
|
||||
bool isValidForCopy(const Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const;
|
||||
bool isValidForStagingWriteImage(const Device &device, size_t size) const;
|
||||
|
||||
int32_t performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction &chunkCopyFunc, CommandStreamReceiver *csr);
|
||||
std::pair<HeapAllocator *, uint64_t> requestStagingBuffer(size_t &size, CommandStreamReceiver *csr);
|
||||
void trackChunk(const StagingBufferTracker &tracker);
|
||||
|
||||
private:
|
||||
std::pair<HeapAllocator *, uint64_t> requestStagingBuffer(size_t &size, CommandStreamReceiver *csr);
|
||||
std::pair<HeapAllocator *, uint64_t> getExistingBuffer(size_t &size);
|
||||
void *allocateStagingBuffer();
|
||||
void *allocateStagingBuffer(size_t size);
|
||||
void clearTrackedChunks(CommandStreamReceiver *csr);
|
||||
|
||||
int32_t performChunkCopy(void *chunkDst, const void *chunkSrc, size_t size, ChunkCopyFunction &chunkCopyFunc, CommandStreamReceiver *csr);
|
||||
|
||||
@@ -273,3 +273,25 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenDirectSubmissionEnabled
|
||||
svmAllocsManager->freeSVMAlloc(usmBuffer);
|
||||
delete[] nonUsmBuffer;
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferManagerTest, givenStagingBufferManagerWhenIsValidForStagingWriteImageCalledThenReturnCorrectValue) {
|
||||
EXPECT_TRUE(stagingBufferManager->isValidForStagingWriteImage(*pDevice, MemoryConstants::pageSize2M));
|
||||
|
||||
EXPECT_FALSE(stagingBufferManager->isValidForStagingWriteImage(*pDevice, 0));
|
||||
EXPECT_FALSE(stagingBufferManager->isValidForStagingWriteImage(*pDevice, MemoryConstants::gigaByte));
|
||||
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(0);
|
||||
EXPECT_FALSE(stagingBufferManager->isValidForStagingWriteImage(*pDevice, MemoryConstants::pageSize2M));
|
||||
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(-1);
|
||||
EXPECT_FALSE(stagingBufferManager->isValidForStagingWriteImage(*pDevice, MemoryConstants::pageSize2M));
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferManagerTest, givenFailedAllocationWhenRequestStagingBufferCalledThenReturnNullptr) {
|
||||
size_t size = MemoryConstants::pageSize2M;
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
auto [heapAllocator, stagingBuffer] = stagingBufferManager->requestStagingBuffer(size, csr);
|
||||
EXPECT_EQ(stagingBuffer, 0u);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user