performance: add infrastructure for staging with 3D images
Related-To: NEO-14026 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
parent
ed37a1e7ef
commit
3010af596e
|
@ -54,8 +54,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
|
|||
|
||||
bool isSingleTransfer = false;
|
||||
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
|
||||
auto isFirstTransfer = (globalOrigin[1] == origin[1] && globalOrigin[2] == origin[2]);
|
||||
|
||||
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]) &&
|
||||
(globalOrigin[2] + globalRegion[2] == origin[2] + region[2]);
|
||||
isSingleTransfer = isFirstTransfer && isLastTransfer;
|
||||
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
|
||||
cl_int ret = 0;
|
||||
|
@ -69,9 +71,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
|
|||
};
|
||||
auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
|
||||
auto dstSlicePitch = inputSlicePitch ? inputSlicePitch : globalRegion[1] * dstRowPitch;
|
||||
|
||||
auto stagingBufferManager = this->context->getStagingBufferManager();
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, bytesPerPixel, chunkWrite, &csr, isRead);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, dstSlicePitch, bytesPerPixel, chunkWrite, &csr, isRead);
|
||||
|
||||
if (isRead && context->isProvidingPerformanceHints()) {
|
||||
auto hostPtrSize = calculateHostPtrSizeForImage(globalRegion, inputRowPitch, inputSlicePitch, image);
|
||||
|
|
|
@ -1139,10 +1139,10 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturn
|
|||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchThenReturnSuccess) {
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
region[0] = MemoryConstants::megaByte / srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, 0u, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
|
@ -1234,4 +1234,25 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithGpuHan
|
|||
|
||||
EXPECT_EQ(res, CL_OUT_OF_RESOURCES);
|
||||
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
}
|
||||
|
||||
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledFor3DImageThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
cl_image_desc imageDesc = {};
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.image_width = 4;
|
||||
imageDesc.image_height = 4;
|
||||
imageDesc.image_depth = 64;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {2, 2, 4};
|
||||
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
|
||||
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
// (2, 2, 4) splitted into (2, 2, 2) * 2
|
||||
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
|
@ -866,10 +866,10 @@ HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenRetu
|
|||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchThenReturnSuccess) {
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
region[0] = MemoryConstants::megaByte / dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, 0u, ptr, nullptr);
|
||||
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
|
@ -962,4 +962,25 @@ HWTEST_F(WriteImageStagingBufferTest, givenIsValidForStagingTransferWhenUserPtrI
|
|||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(buffer, mappedPtr, buffer->getSize(), CL_COMMAND_WRITE_IMAGE, false, false));
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledFor3DImageThenReturnSuccess) {
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
|
||||
cl_image_desc imageDesc = {};
|
||||
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
imageDesc.num_mip_levels = 0;
|
||||
imageDesc.image_width = 4;
|
||||
imageDesc.image_height = 4;
|
||||
imageDesc.image_depth = 64;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {2, 2, 4};
|
||||
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
|
||||
|
||||
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
|
||||
EXPECT_EQ(res, CL_SUCCESS);
|
||||
|
||||
// (2, 2, 4) splitted into (2, 2, 2) * 2
|
||||
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueWriteImageCounter);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||
}
|
|
@ -122,56 +122,114 @@ StagingTransferStatus StagingBufferManager::performCopy(void *dstPtr, const void
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* This method orchestrates transfer operation for images with given origin and region.
|
||||
* Transfer is splitted into chunks, each chunk represents sub-region to transfer.
|
||||
* Each chunk contains staging buffer which should be used instead of non-usm memory during transfers on GPU.
|
||||
* Several rows are packed into single chunk unless size of single row exceeds maximum chunk size (2MB).
|
||||
* Caller provides actual function to enqueue read/write operation for single chunk.
|
||||
*/
|
||||
StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
|
||||
StagingQueue stagingQueue;
|
||||
size_t origin[3] = {};
|
||||
size_t region[3] = {};
|
||||
origin[0] = globalOrigin[0];
|
||||
origin[2] = globalOrigin[2];
|
||||
region[0] = globalRegion[0];
|
||||
region[2] = globalRegion[2];
|
||||
size_t calculateSizeForRegion(size_t region[3], const ImageMetadata &imageMetadata) {
|
||||
if (region[2] > 1) {
|
||||
return (region[2] - 1) * imageMetadata.slicePitch + (region[1] - 1) * imageMetadata.rowPitch + region[0] * imageMetadata.bytesPerPixel;
|
||||
} else if (region[1] > 1) {
|
||||
return (region[1] - 1) * imageMetadata.rowPitch + region[0] * imageMetadata.bytesPerPixel;
|
||||
}
|
||||
return region[0] * imageMetadata.bytesPerPixel;
|
||||
}
|
||||
|
||||
StagingTransferStatus StagingBufferManager::performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset,
|
||||
size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata,
|
||||
ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
|
||||
auto rowPitch = imageMetadata.rowPitch;
|
||||
auto rowsPerChunk = std::max<size_t>(1ul, chunkSize / rowPitch);
|
||||
rowsPerChunk = std::min<size_t>(rowsPerChunk, globalRegion[1]);
|
||||
auto numOfChunks = globalRegion[1] / rowsPerChunk;
|
||||
auto remainder = globalRegion[1] % (rowsPerChunk * numOfChunks);
|
||||
rowsPerChunk = std::min<size_t>(rowsPerChunk, rowsToCopy);
|
||||
auto slicePitch = imageMetadata.slicePitch;
|
||||
auto numOfChunksInYDim = rowsToCopy / rowsPerChunk;
|
||||
auto remainder = rowsToCopy % (rowsPerChunk * numOfChunksInYDim);
|
||||
StagingTransferStatus result{};
|
||||
RowPitchData rowPitchData{region[0] * bytesPerPixel, rowPitch, rowsPerChunk};
|
||||
|
||||
for (auto i = 0u; i < numOfChunks; i++) {
|
||||
origin[1] = globalOrigin[1] + i * rowsPerChunk;
|
||||
// Split (X, Y, Z') region into several (X, Y', Z') chunks.
|
||||
for (auto rowId = 0u; rowId < numOfChunksInYDim; rowId++) {
|
||||
origin[1] = baseRowOffset + rowId * rowsPerChunk;
|
||||
region[1] = rowsPerChunk;
|
||||
auto size = region[1] * rowPitch;
|
||||
auto chunkPtr = ptrOffset(ptr, i * rowsPerChunk * rowPitch);
|
||||
UserData userData{chunkPtr, size, rowPitchData};
|
||||
|
||||
result = performChunkTransfer(i, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
|
||||
auto size = calculateSizeForRegion(region, imageMetadata);
|
||||
auto chunkPtr = ptrOffset(ptr, sliceOffset * slicePitch + rowId * rowsPerChunk * rowPitch);
|
||||
|
||||
imageMetadata.rowsInChunk = rowsPerChunk;
|
||||
UserData userData{chunkPtr, size, imageMetadata};
|
||||
|
||||
result = performChunkTransfer(submittedChunks++, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
|
||||
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (remainder != 0) {
|
||||
origin[1] = globalOrigin[1] + numOfChunks * rowsPerChunk;
|
||||
origin[1] = baseRowOffset + numOfChunksInYDim * rowsPerChunk;
|
||||
region[1] = remainder;
|
||||
auto size = region[1] * rowPitch;
|
||||
auto chunkPtr = ptrOffset(ptr, numOfChunks * rowsPerChunk * rowPitch);
|
||||
rowPitchData.rowsInChunk = remainder;
|
||||
UserData userData{chunkPtr, size, rowPitchData};
|
||||
|
||||
result = performChunkTransfer(numOfChunks, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
|
||||
auto size = calculateSizeForRegion(region, imageMetadata);
|
||||
auto chunkPtr = ptrOffset(ptr, sliceOffset * slicePitch + numOfChunksInYDim * rowsPerChunk * rowPitch);
|
||||
|
||||
imageMetadata.rowsInChunk = remainder;
|
||||
UserData userData{chunkPtr, size, imageMetadata};
|
||||
|
||||
result = performChunkTransfer(submittedChunks++, isRead, userData, stagingQueue, csr, chunkTransferImageFunc, origin, region);
|
||||
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* This method orchestrates transfer operation for images with given origin and region.
|
||||
* Transfer is splitted into chunks, each chunk represents sub-region to transfer.
|
||||
* Each chunk contains staging buffer which should be used instead of non-usm memory during transfers on GPU.
|
||||
* Several slices and rows can be packed into single chunk if size of such chunk does not exceeds maximum chunk size (2MB).
|
||||
* Caller provides actual function to enqueue read/write operation for single chunk.
|
||||
*/
|
||||
StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) {
|
||||
StagingQueue stagingQueue;
|
||||
size_t origin[3] = {};
|
||||
size_t region[3] = {};
|
||||
origin[0] = globalOrigin[0];
|
||||
region[0] = globalRegion[0];
|
||||
StagingTransferStatus result{};
|
||||
size_t submittedChunks = 0;
|
||||
|
||||
// Calculate number of rows that can be packed into single chunk.
|
||||
auto rowsPerChunk = std::max<size_t>(1ul, chunkSize / rowPitch);
|
||||
rowsPerChunk = std::min<size_t>(rowsPerChunk, globalRegion[1]);
|
||||
auto numOfChunksInYDim = globalRegion[1] / rowsPerChunk;
|
||||
|
||||
// If single chunk is enough to transfer whole slice, we can try to pack several slices into single chunk.
|
||||
size_t slicesPerStep = 1;
|
||||
if (numOfChunksInYDim == 1) {
|
||||
slicesPerStep = std::max<size_t>(1ul, chunkSize / slicePitch);
|
||||
slicesPerStep = std::min<size_t>(slicesPerStep, globalRegion[2]);
|
||||
}
|
||||
auto remainderSlices = globalRegion[2] % slicesPerStep;
|
||||
|
||||
ImageMetadata imageMetadata{bytesPerPixel, globalRegion[0] * bytesPerPixel, rowPitch, slicePitch};
|
||||
|
||||
// Split (X, Y, Z) region into several (X, Y, Z') chunks.
|
||||
for (auto sliceId = 0u; sliceId < globalRegion[2] / slicesPerStep; sliceId++) {
|
||||
auto sliceOffset = sliceId * slicesPerStep;
|
||||
origin[2] = globalOrigin[2] + sliceOffset;
|
||||
region[2] = slicesPerStep;
|
||||
result = performImageSlicesTransfer(stagingQueue, submittedChunks, ptr, sliceOffset, globalOrigin[1], globalRegion[1], origin, region, imageMetadata, chunkTransferImageFunc, csr, isRead);
|
||||
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, numOfChunks + (remainder != 0 ? 1 : 0));
|
||||
if (remainderSlices != 0) {
|
||||
auto sliceOffset = globalRegion[2] - remainderSlices;
|
||||
origin[2] = globalOrigin[2] + sliceOffset;
|
||||
region[2] = remainderSlices;
|
||||
result = performImageSlicesTransfer(stagingQueue, submittedChunks, ptr, sliceOffset, globalOrigin[1], globalRegion[1], origin, region, imageMetadata, chunkTransferImageFunc, csr, isRead);
|
||||
if (result.chunkCopyStatus != 0 || result.waitStatus == WaitStatus::gpuHang) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.waitStatus = drainAndReleaseStagingQueue(isRead, stagingQueue, submittedChunks);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -219,10 +277,10 @@ WaitStatus StagingBufferManager::copyStagingToHost(const std::pair<UserData, Sta
|
|||
tracker = transfer.second;
|
||||
auto stagingBuffer = addrToPtr(tracker.chunkAddress);
|
||||
auto userDst = const_cast<void *>(userData.ptr);
|
||||
if (userData.rowPitchData.rowSize < userData.rowPitchData.rowPitch) {
|
||||
for (auto rowId = 0u; rowId < userData.rowPitchData.rowsInChunk; rowId++) {
|
||||
auto offset = rowId * userData.rowPitchData.rowPitch;
|
||||
memcpy(ptrOffset(userDst, offset), ptrOffset(stagingBuffer, offset), userData.rowPitchData.rowSize);
|
||||
if (userData.imageMetadata.rowSize < userData.imageMetadata.rowPitch) {
|
||||
for (auto rowId = 0u; rowId < userData.imageMetadata.rowsInChunk; rowId++) {
|
||||
auto offset = rowId * userData.imageMetadata.rowPitch;
|
||||
memcpy(ptrOffset(userDst, offset), ptrOffset(stagingBuffer, offset), userData.imageMetadata.rowSize);
|
||||
}
|
||||
} else {
|
||||
memcpy(userDst, stagingBuffer, userData.size);
|
||||
|
|
|
@ -59,16 +59,19 @@ struct StagingBufferTracker {
|
|||
void freeChunk() const;
|
||||
};
|
||||
|
||||
struct RowPitchData {
|
||||
struct ImageMetadata {
|
||||
size_t bytesPerPixel = 0;
|
||||
size_t rowSize = 0;
|
||||
size_t rowPitch = 0;
|
||||
size_t slicePitch = 0;
|
||||
|
||||
size_t rowsInChunk = 0;
|
||||
};
|
||||
|
||||
struct UserData {
|
||||
const void *ptr = nullptr;
|
||||
size_t size = 0;
|
||||
RowPitchData rowPitchData{};
|
||||
ImageMetadata imageMetadata{};
|
||||
};
|
||||
|
||||
struct StagingTransferStatus {
|
||||
|
@ -88,7 +91,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
|
|||
bool isValidForStagingTransfer(const Device &device, const void *ptr, size_t size, bool hasDependencies);
|
||||
|
||||
StagingTransferStatus performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction &chunkCopyFunc, CommandStreamReceiver *csr);
|
||||
StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
|
||||
StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
|
||||
StagingTransferStatus performBufferTransfer(const void *ptr, size_t globalOffset, size_t globalSize, ChunkTransferBufferFunc &chunkTransferBufferFunc, CommandStreamReceiver *csr, bool isRead);
|
||||
|
||||
std::pair<HeapAllocator *, uint64_t> requestStagingBuffer(size_t &size);
|
||||
|
@ -104,6 +107,9 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass {
|
|||
|
||||
template <class Func, class... Args>
|
||||
StagingTransferStatus performChunkTransfer(size_t chunkTransferId, bool isRead, const UserData &userData, StagingQueue ¤tStagingBuffers, CommandStreamReceiver *csr, Func &func, Args... args);
|
||||
StagingTransferStatus performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset,
|
||||
size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata,
|
||||
ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead);
|
||||
|
||||
WaitStatus copyStagingToHost(const std::pair<UserData, StagingBufferTracker> &transfer, StagingBufferTracker &tracker) const;
|
||||
WaitStatus drainAndReleaseStagingQueue(bool isRead, const StagingQueue &stagingQueue, size_t numOfSubmittedTransfers) const;
|
||||
|
|
|
@ -134,7 +134,7 @@ class StagingBufferManagerFixture : public DeviceFixture {
|
|||
return 0;
|
||||
};
|
||||
auto initialNumOfUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs();
|
||||
auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, pixelElemSize, chunkTransfer, csr, isRead);
|
||||
auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, rowPitch * globalRegion[1], pixelElemSize, chunkTransfer, csr, isRead);
|
||||
auto newUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs() - initialNumOfUsmAllocations;
|
||||
|
||||
for (auto rowId = 0u; rowId < globalRegion[1]; rowId++) {
|
||||
|
@ -147,7 +147,7 @@ class StagingBufferManagerFixture : public DeviceFixture {
|
|||
EXPECT_EQ(expectedChunks, chunkCounter);
|
||||
|
||||
auto expectedNewUsmAllocations = 1u;
|
||||
if (isRead) {
|
||||
if (isRead && pixelElemSize * globalRegion[0] * globalRegion[1] > stagingBufferSize) {
|
||||
expectedNewUsmAllocations = 2u;
|
||||
}
|
||||
EXPECT_EQ(expectedNewUsmAllocations, newUsmAllocations);
|
||||
|
@ -183,6 +183,12 @@ class StagingBufferManagerFixture : public DeviceFixture {
|
|||
delete[] nonUsmBuffer;
|
||||
}
|
||||
|
||||
void fillUserData(unsigned int *userData, size_t size) {
|
||||
for (auto i = 0u; i < size; i++) {
|
||||
userData[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr static size_t stagingBufferSize = MemoryConstants::megaByte * 2;
|
||||
constexpr static size_t pixelElemSize = 1u;
|
||||
DebugManagerStateRestore restorer;
|
||||
|
@ -548,22 +554,22 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithOrigi
|
|||
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithMultipleRowsPerChunkThenWholeRegionCovered) {
|
||||
size_t expectedChunks = 4;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {4, 8, 1};
|
||||
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
|
||||
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 8, 1};
|
||||
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
|
||||
}
|
||||
|
||||
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithRemainderThenWholeRegionCovered) {
|
||||
size_t expectedChunks = 4;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {4, 7, 1};
|
||||
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
|
||||
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 7, 1};
|
||||
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
|
||||
}
|
||||
|
||||
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformImageReadWithRemainderAndTransfersWithinLimitThenWholeRegionCovered) {
|
||||
size_t expectedChunks = 2;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {4, 3, 1};
|
||||
imageTransferThroughStagingBuffers(true, MemoryConstants::megaByte, globalOrigin, globalRegion, expectedChunks);
|
||||
const size_t globalRegion[3] = {1 * MemoryConstants::megaByte, 3, 1};
|
||||
imageTransferThroughStagingBuffers(true, pixelElemSize * globalRegion[0], globalOrigin, globalRegion, expectedChunks);
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringChunkReadFromImageThenReturnImmediatelyWithFailure) {
|
||||
|
@ -579,7 +585,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringChunkReadF
|
|||
};
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(csr);
|
||||
ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang;
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
EXPECT_EQ(0, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
|
||||
EXPECT_EQ(2u, chunkCounter);
|
||||
|
@ -601,7 +607,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangAfterChunkReadFr
|
|||
}
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
EXPECT_EQ(0, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
|
||||
EXPECT_EQ(4u, chunkCounter);
|
||||
|
@ -624,7 +630,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringRemainderC
|
|||
}
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true);
|
||||
EXPECT_EQ(0, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
|
||||
EXPECT_EQ(remainderCounter - 1, chunkCounter);
|
||||
|
@ -643,7 +649,7 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteThen
|
|||
++chunkCounter;
|
||||
return expectedErrorCode;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
|
||||
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
|
||||
EXPECT_EQ(1u, chunkCounter);
|
||||
|
@ -666,13 +672,112 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWith
|
|||
}
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false);
|
||||
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
|
||||
EXPECT_EQ(remainderCounter, chunkCounter);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
struct Image3DTestInfo {
|
||||
size_t expectedChunks;
|
||||
size_t slicePitch;
|
||||
size_t slices;
|
||||
};
|
||||
class StagingBufferManager3DImageTest : public StagingBufferManagerTest,
|
||||
public ::testing::WithParamInterface<Image3DTestInfo> {};
|
||||
|
||||
HWTEST_P(StagingBufferManager3DImageTest, givenStagingBufferWhenPerformImageTransferCalledWith3DImageThenSplitCorrectly) {
|
||||
size_t expectedChunks = GetParam().expectedChunks;
|
||||
auto rowPitch = 4u;
|
||||
auto rowsNum = 4u;
|
||||
auto slicePitch = GetParam().slicePitch;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {rowPitch, rowsNum, GetParam().slices};
|
||||
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
|
||||
auto ptr = new unsigned int[size];
|
||||
fillUserData(ptr, size);
|
||||
|
||||
size_t chunkCounter = 0;
|
||||
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
// Verify that staging buffer contains correct data based on origin offset.
|
||||
auto offset = origin[0] + origin[1] * rowPitch + origin[2] * slicePitch;
|
||||
auto userPtr = ptr + (offset / sizeof(uint32_t));
|
||||
EXPECT_EQ(0, memcmp(userPtr, stagingBuffer, region[0] * region[1] * region[2]));
|
||||
|
||||
++chunkCounter;
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false);
|
||||
EXPECT_EQ(0, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
|
||||
EXPECT_EQ(expectedChunks, chunkCounter);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
Image3DTestInfo imageTestsInfo[] = {
|
||||
{8u, StagingBufferManagerFixture::stagingBufferSize, 8}, // (4, 4, 8) split into (4, 4, 1) * 8
|
||||
{4u, StagingBufferManagerFixture::stagingBufferSize / 2, 8}, // (4, 4, 8) split into (4, 4, 2) * 4
|
||||
{5u, StagingBufferManagerFixture::stagingBufferSize / 2, 9}, // (4, 4, 9) split into (4, 4, 2) * 4 + (4, 4, 1)
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
StagingBufferManagerTest_,
|
||||
StagingBufferManager3DImageTest,
|
||||
testing::ValuesIn(imageTestsInfo));
|
||||
|
||||
HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringSliceRemainderChunkReadFromImageThenReturnImmediatelyWithFailure) {
|
||||
auto expectedChunks = 5u;
|
||||
size_t rowPitch = 4u;
|
||||
auto rowsNum = 4u;
|
||||
size_t slicePitch = MemoryConstants::megaByte;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {rowPitch, rowsNum, 9};
|
||||
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
|
||||
auto ptr = new unsigned int[size];
|
||||
|
||||
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(csr);
|
||||
size_t chunkCounter = 0;
|
||||
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
++chunkCounter;
|
||||
if (chunkCounter == expectedChunks - 1) {
|
||||
ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, true);
|
||||
EXPECT_EQ(0, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus);
|
||||
EXPECT_EQ(expectedChunks - 1, chunkCounter);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWithSliceRemainderThenReturnWithFailure) {
|
||||
auto expectedChunks = 5u;
|
||||
size_t rowPitch = 4u;
|
||||
auto rowsNum = 4u;
|
||||
size_t slicePitch = MemoryConstants::megaByte;
|
||||
const size_t globalOrigin[3] = {0, 0, 0};
|
||||
const size_t globalRegion[3] = {rowPitch, rowsNum, 9};
|
||||
auto size = stagingBufferSize * expectedChunks / sizeof(unsigned int);
|
||||
auto ptr = new unsigned int[size];
|
||||
|
||||
size_t chunkCounter = 0;
|
||||
constexpr int expectedErrorCode = 1;
|
||||
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
|
||||
++chunkCounter;
|
||||
if (chunkCounter == expectedChunks) {
|
||||
return expectedErrorCode;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false);
|
||||
EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus);
|
||||
EXPECT_EQ(WaitStatus::ready, ret.waitStatus);
|
||||
EXPECT_EQ(expectedChunks, chunkCounter);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformBufferTransferThenCopyData) {
|
||||
constexpr size_t numOfChunkCopies = 8;
|
||||
constexpr size_t remainder = 1024;
|
||||
|
|
Loading…
Reference in New Issue