performance: add infrastructure for staging with 3D images

Related-To: NEO-14026

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2025-03-31 12:32:11 +00:00 committed by Compute-Runtime-Automation
parent ed37a1e7ef
commit 3010af596e
6 changed files with 273 additions and 59 deletions

View File

@ -54,8 +54,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
bool isSingleTransfer = false;
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
auto isFirstTransfer = (globalOrigin[1] == origin[1] && globalOrigin[2] == origin[2]);
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]) &&
(globalOrigin[2] + globalRegion[2] == origin[2] + region[2]);
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
cl_int ret = 0;
@ -69,9 +71,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
};
auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
auto dstSlicePitch = inputSlicePitch ? inputSlicePitch : globalRegion[1] * dstRowPitch;
auto stagingBufferManager = this->context->getStagingBufferManager();
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, bytesPerPixel, chunkWrite, &csr, isRead);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, dstSlicePitch, bytesPerPixel, chunkWrite, &csr, isRead);
if (isRead && context->isProvidingPerformanceHints()) {
auto hostPtrSize = calculateHostPtrSizeForImage(globalRegion, inputRowPitch, inputSlicePitch, image);

View File

@ -1139,10 +1139,10 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturn
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchThenReturnSuccess) {
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
region[0] = MemoryConstants::megaByte / srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, 0u, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
@ -1234,4 +1234,25 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithGpuHan
EXPECT_EQ(res, CL_OUT_OF_RESOURCES);
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
}
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledFor3DImageThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
cl_image_desc imageDesc = {};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
imageDesc.num_mip_levels = 0;
imageDesc.image_width = 4;
imageDesc.image_height = 4;
imageDesc.image_depth = 64;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {2, 2, 4};
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
// (2, 2, 4) splitted into (2, 2, 2) * 2
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}

View File

@ -866,10 +866,10 @@ HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenRetu
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchThenReturnSuccess) {
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
region[0] = MemoryConstants::megaByte / dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, 0u, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
@ -962,4 +962,25 @@ HWTEST_F(WriteImageStagingBufferTest, givenIsValidForStagingTransferWhenUserPtrI
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(buffer, mappedPtr, buffer->getSize(), CL_COMMAND_WRITE_IMAGE, false, false));
delete buffer;
}
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledFor3DImageThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
cl_image_desc imageDesc = {};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
imageDesc.num_mip_levels = 0;
imageDesc.image_width = 4;
imageDesc.image_height = 4;
imageDesc.image_depth = 64;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {2, 2, 4};
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
// (2, 2, 4) splitted into (2, 2, 2) * 2
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueWriteImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}

View File

@ -122,56 +122,114 @@ StagingTransferStatus StagingBufferManager::performCopy(void *dstPtr, const void
return result;
}
/*
* This method orchestrates transfer operation for images with given origin and region.
* Transfer is splitted into chunks, each chunk represents sub-region to transfer.
* Each chunk contains staging buffer which should be used instead of non-usm memory during transfers on GPU.
* Several rows are packed into single chunk unless size of single row exceeds maximum chunk size (2MB).
* Caller provides actual function to enqueue read/write operation for single chunk.
*/
StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
StagingQueue stagingQueue;
size_t origin[3] = {};
size_t region[3] = {};
origin[0] = globalOrigin[0];
origin[2] = globalOrigin[2];
region[0] = globalRegion[0];
region[2] = globalRegion[2];
size_t calculateSizeForRegion(size_t region[3], const ImageMetadata &imageMetadata) {
if (region[2] > 1) {
return (region[2] - 1) * imageMetadata.slicePitch + (region[1] - 1) * imageMetadata.rowPitch + region[0] * imageMetadata.bytesPerPixel;
} else if (region[1] > 1) {
return (region[1] - 1) * imageMetadata.rowPitch + region[0] * imageMetadata.bytesPerPixel;
}
return region[0] * imageMetadata.bytesPerPixel;
}
StagingTransferStatus StagingBufferManager::performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset,
size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata,
ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
auto rowPitch = imageMetadata.rowPitch;
auto rowsPerChunk = std::max<size_t>(1ul, chunkSize / rowPitch);
rowsPerChunk = std::min<size_t>(rowsPerChunk, globalRegion[1]);
auto numOfChunks = globalRegion[1] / rowsPerChunk;
auto remainder = globalRegion[1] % (rowsPerChunk * numOfChunks);
rowsPerChunk = std::min<size_t>(rowsPerChunk, rowsToCopy);
auto slicePitch = imageMetadata.slicePitch;
auto numOfChunksInYDim = rowsToCopy / rowsPerChunk;
auto remainder = rowsToCopy % (rowsPerChunk * numOfChunksInYDim);
StagingTransferStatus result{};
RowPitchData rowPitchData{region[0] * bytesPerPixel, rowPitch, rowsPerChunk};
for (auto i = 0u; i < numOfChunks; i++) {
origin[1] = globalOrigin[1] + i * rowsPerChunk;
// Split (X, Y, Z') region into several (X, Y', Z') chunks.
for (auto rowId = 0u; rowId < numOfChunksInYDim; rowId++) {
origin[1] = baseRowOffset + rowId * rowsPerChunk;
region[1] = rowsPerChunk;
auto size = region[1] * rowPitch;
auto chunkPtr = ptrOffset(ptr, i * rowsPerChunk * rowPitch);
UserData userData{chunkPtr, size, rowPitchData};
result = performChunkTransfer(i, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
auto size = calculateSizeForRegion(region, imageMetadata);
auto chunkPtr = ptrOffset(ptr, sliceOffset * slicePitch + rowId * rowsPerChunk * rowPitch);
imageMetadata.rowsInChunk = rowsPerChunk;
UserData userData{chunkPtr, size, imageMetadata};
result = performChunkTransfer(submittedChunks++, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
return result;
}
}
if (remainder != 0) {
origin[1] = globalOrigin[1] + numOfChunks * rowsPerChunk;
origin[1] = baseRowOffset + numOfChunksInYDim * rowsPerChunk;
region[1] = remainder;
auto size = region[1] * rowPitch;
auto chunkPtr = ptrOffset(ptr, numOfChunks * rowsPerChunk * rowPitch);
rowPitchData.rowsInChunk = remainder;
UserData userData{chunkPtr, size, rowPitchData};
result = performChunkTransfer(numOfChunks, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
auto size = calculateSizeForRegion(region, imageMetadata);
auto chunkPtr = ptrOffset(ptr, sliceOffset * slicePitch + numOfChunksInYDim * rowsPerChunk * rowPitch);
imageMetadata.rowsInChunk = remainder;
UserData userData{chunkPtr, size, imageMetadata};
result = performChunkTransfer(submittedChunks++, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
return result;
}
}
return result;
}
/*
* This method orchestrates transfer operation for images with given origin and region.
* Transfer is splitted into chunks, each chunk represents sub-region to transfer.
* Each chunk contains staging buffer which should be used instead of non-usm memory during transfers on GPU.
* Several slices and rows can be packed into single chunk if size of such chunk does not exceeds maximum chunk size (2MB).
* Caller provides actual function to enqueue read/write operation for single chunk.
*/
StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
StagingQueue stagingQueue;
size_t origin[3] = {};
size_t region[3] = {};
origin[0] = globalOrigin[0];
region[0] = globalRegion[0];
StagingTransferStatus result{};
size_t submittedChunks = 0;
// Calculate number of rows that can be packed into single chunk.
auto rowsPerChunk = std::max<size_t>(1ul, chunkSize / rowPitch);
rowsPerChunk = std::min<size_t>(rowsPerChunk, globalRegion[1]);
auto numOfChunksInYDim = globalRegion[1] / rowsPerChunk;
// If single chunk is enough to transfer whole slice, we can try to pack several slices into single chunk.
size_t slicesPerStep = 1;
if (numOfChunksInYDim == 1) {
slicesPerStep = std::max<size_t>(1ul, chunkSize / slicePitch);
slicesPerStep = std::min<size_t>(slicesPerStep, globalRegion[2]);
}
auto remainderSlices = globalRegion[2] % slicesPerStep;
ImageMetadata imageMetadata{bytesPerPixel, globalRegion[0] * bytesPerPixel, rowPitch, slicePitch};
// Split (X, Y, Z) region into several (X, Y, Z') chunks.
for (auto sliceId = 0u; sliceId < globalRegion[2] / slicesPerStep; sliceId++) {
auto sliceOffset = sliceId * slicesPerStep;
origin[2] = globalOrigin[2] + sliceOffset;
region[2] = slicesPerStep;
result = performImageSlicesTransfer(stagingQueue, submittedChunks, ptr, sliceOffset, globalOrigin[1], globalRegion[1], origin, region, imageMetadata, chunkTransferImageFunc, csr, isRead);
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
return result;
}
}
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, numOfChunks + (remainder != 0 ? 1 : 0));
if (remainderSlices != 0) {
auto sliceOffset = globalRegion[2] - remainderSlices;
origin[2] = globalOrigin[2] + sliceOffset;
region[2] = remainderSlices;
result = performImageSlicesTransfer(stagingQueue, submittedChunks, ptr, sliceOffset, globalOrigin[1], globalRegion[1], origin, region, imageMetadata, chunkTransferImageFunc, csr, isRead);
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
return result;
}
}
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, submittedChunks);
return result;
}
@ -219,10 +277,10 @@ WaitStatus StagingBufferManager::copyStagingToHost(const std::pair<UserData, Sta
tracker = transfer.second;
auto stagingBuffer = addrToPtr(tracker.chunkAddress);
auto userDst = const_cast<void *>(userData.ptr);
if (userData.rowPitchData.rowSize < userData.rowPitchData.rowPitch) {
for (auto rowId = 0u; rowId < userData.rowPitchData.rowsInChunk; rowId++) {
auto offset = rowId * userData.rowPitchData.rowPitch;
memcpy(ptrOffset(userDst, offset), ptrOffset(stagingBuffer, offset), userData.rowPitchData.rowSize);
if (userData.imageMetadata.rowSize < userData.imageMetadata.rowPitch) {
for (auto rowId = 0u; rowId < userData.imageMetadata.rowsInChunk; rowId++) {
auto offset = rowId * userData.imageMetadata.rowPitch;
memcpy(ptrOffset(userDst, offset), ptrOffset(stagingBuffer, offset), userData.imageMetadata.rowSize);
}
} else {
memcpy(userDst, stagingBuffer, userData.size);

View File

@ -59,16 +59,19 @@ struct StagingBufferTracker {
void freeChunk() const;
};
struct RowPitchData {
struct ImageMetadata {
size_t bytesPerPixel = 0;
size_t rowSize = 0;
size_t rowPitch = 0;
size_t slicePitch = 0;
size_t rowsInChunk = 0;
};
struct UserData {
const void *ptr = nullptr;
size_t size = 0;
RowPitchData rowPitchData{};
ImageMetadata imageMetadata{};
};
struct StagingTransferStatus {
@ -88,7 +91,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
bool isValidForStagingTransfer(const Device &device, const void *ptr, size_t size, bool hasDependencies);
StagingTransferStatus performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction &chunkCopyFunc, CommandStreamReceiver *csr);
StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
StagingTransferStatus performBufferTransfer(const void *ptr, size_t globalOffset, size_t globalSize, ChunkTransferBufferFunc &chunkTransferBufferFunc, CommandStreamReceiver *csr, bool isRead);
std::pair<HeapAllocator *, uint64_t> requestStagingBuffer(size_t &size);
@ -104,6 +107,9 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
template <class Func, class... Args>
StagingTransferStatus performChunkTransfer(size_t chunkTransferId, bool isRead, const UserData &userData, StagingQueue &currentStagingBuffers, CommandStreamReceiver *csr, Func &func, Args... args);
StagingTransferStatus performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset,
size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata,
ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
WaitStatus copyStagingToHost(const std::pair<UserData, StagingBufferTracker> &transfer, StagingBufferTracker &tracker) const;
WaitStatus drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const;

View File

@ -134,7 +134,7 @@ class StagingBufferManagerFixture : public DeviceFixture {
return 0;
};
auto initialNumOfUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs();
auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, pixelElemSize, chunkTransfer, csr, isRead);
auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, rowPitch * globalRegion[1], pixelElemSize, chunkTransfer, csr, isRead);
auto newUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs() - initialNumOfUsmAllocations;
for (auto rowId = 0u; rowId < globalRegion[1]; rowId++) {
@ -147,7 +147,7 @@ class StagingBufferManagerFixture : public DeviceFixture {
EXPECT_EQ(expectedChunks, chunkCounter);
auto expectedNewUsmAllocations = 1u;
if (isRead) {
if (isRead && pixelElemSize * globalRegion[0] * globalRegion[1] > stagingBufferSize) {
expectedNewUsmAllocations = 2u;
}
EXPECT_EQ(expectedNewUsmAllocations, newUsmAllocations);
@ -183,6 +183,12 @@ class StagingBufferManagerFixture : public DeviceFixture {
delete[] nonUsmBuffer;
}
void fillUserData(unsigned int *userData, size_t size) {
for (auto i = 0u; i < size; i++) {
userData[i] = i;
}
}
constexpr static size_t stagingBufferSize = MemoryConstants::megaByte * 2;
constexpr static size_t pixelElemSize = 1u;
DebugManagerStateRestore restorer;
@ -548,22 +554,22 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithOrigi
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithMultipleRowsPerChunkThenWholeRegionCovered) {
size_t expectedChunks = 4;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {4, 8, 1};
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 8, 1};
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
}
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithRemainderThenWholeRegionCovered) {
size_t expectedChunks = 4;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {4, 7, 1};
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 7, 1};
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
}
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithRemainderAndTransfersWithinLimitThenWholeRegionCovered) {
size_t expectedChunks = 2;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {4, 3, 1};
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 3, 1};
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
}
HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringChunkReadFromImageThenReturnImmediatelyWithFailure) {
@ -579,7 +585,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringChunkReadF
};
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(csr);
ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang;
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
EXPECT_EQ(0, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
EXPECT_EQ(2u, chunkCounter);
@ -601,7 +607,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangAfterChunkReadFr
}
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
EXPECT_EQ(0, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
EXPECT_EQ(4u, chunkCounter);
@ -624,7 +630,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringRemainderC
}
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
EXPECT_EQ(0, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
EXPECT_EQ(remainderCounter - 1, chunkCounter);
@ -643,7 +649,7 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteThen
++chunkCounter;
return expectedErrorCode;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
EXPECT_EQ(1u, chunkCounter);
@ -666,13 +672,112 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWith
}
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
EXPECT_EQ(remainderCounter, chunkCounter);
delete[] ptr;
}
struct Image3DTestInfo {
size_t expectedChunks;
size_t slicePitch;
size_t slices;
};
class StagingBufferManager3DImageTest : public StagingBufferManagerTest,
public ::testing::WithParamInterface<Image3DTestInfo> {};
HWTEST_P(StagingBufferManager3DImageTest, givenStagingBufferWhenPerformImageTransferCalledWith3DImageThenSplitCorrectly) {
size_t expectedChunks = GetParam().expectedChunks;
auto rowPitch = 4u;
auto rowsNum = 4u;
auto slicePitch = GetParam().slicePitch;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {rowPitch, rowsNum, GetParam().slices};
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
auto ptr = new unsigned int[size];
fillUserData(ptr, size);
size_t chunkCounter = 0;
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
// Verify that staging buffer contains correct data based on origin offset.
auto offset = origin[0] + origin[1] * rowPitch + origin[2] * slicePitch;
auto userPtr = ptr + (offset / sizeof(uint32_t));
EXPECT_EQ(0, memcmp(userPtr, stagingBuffer, region[0] * region[1] * region[2]));
++chunkCounter;
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false);
EXPECT_EQ(0, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
EXPECT_EQ(expectedChunks, chunkCounter);
delete[] ptr;
}
Image3DTestInfo imageTestsInfo[] = {
{8u, StagingBufferManagerFixture::stagingBufferSize, 8}, // (4, 4, 8) split into (4, 4, 1) * 8
{4u, StagingBufferManagerFixture::stagingBufferSize / 2, 8}, // (4, 4, 8) split into (4, 4, 2) * 4
{5u, StagingBufferManagerFixture::stagingBufferSize / 2, 9}, // (4, 4, 9) split into (4, 4, 2) * 4 + (4, 4, 1)
};
INSTANTIATE_TEST_SUITE_P(
StagingBufferManagerTest_,
StagingBufferManager3DImageTest,
testing::ValuesIn(imageTestsInfo));
HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringSliceRemainderChunkReadFromImageThenReturnImmediatelyWithFailure) {
auto expectedChunks = 5u;
size_t rowPitch = 4u;
auto rowsNum = 4u;
size_t slicePitch = MemoryConstants::megaByte;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {rowPitch, rowsNum, 9};
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
auto ptr = new unsigned int[size];
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(csr);
size_t chunkCounter = 0;
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
++chunkCounter;
if (chunkCounter == expectedChunks - 1) {
ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang;
}
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, true);
EXPECT_EQ(0, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
EXPECT_EQ(expectedChunks - 1, chunkCounter);
delete[] ptr;
}
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWithSliceRemainderThenReturnWithFailure) {
auto expectedChunks = 5u;
size_t rowPitch = 4u;
auto rowsNum = 4u;
size_t slicePitch = MemoryConstants::megaByte;
const size_t globalOrigin[3] = {0, 0, 0};
const size_t globalRegion[3] = {rowPitch, rowsNum, 9};
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
auto ptr = new unsigned int[size];
size_t chunkCounter = 0;
constexpr int expectedErrorCode = 1;
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
++chunkCounter;
if (chunkCounter == expectedChunks) {
return expectedErrorCode;
}
return 0;
};
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false);
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
EXPECT_EQ(expectedChunks, chunkCounter);
delete[] ptr;
}
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformBufferTransferThenCopyData) {
constexpr size_t numOfChunkCopies = 8;
constexpr size_t remainder = 1024;