performance: add infrastructure for staging with 3D images

Related-To: NEO-14026

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-03-31 12:32:11 +00:00
committed by Compute-Runtime-Automation
parent ed37a1e7ef
commit 3010af596e
6 changed files with 273 additions and 59 deletions

View File

@@ -54,8 +54,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
bool isSingleTransfer = false;
ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t {
auto isFirstTransfer = (globalOrigin[1] == origin[1]);
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]);
auto isFirstTransfer = (globalOrigin[1] == origin[1] && globalOrigin[2] == origin[2]);
auto isLastTransfer = (globalOrigin[1] + globalRegion[1] == origin[1] + region[1]) &&
(globalOrigin[2] + globalRegion[2] == origin[2] + region[2]);
isSingleTransfer = isFirstTransfer && isLastTransfer;
cl_event *outEvent = assignEventForStaging(event, &profilingEvent, isFirstTransfer, isLastTransfer);
cl_int ret = 0;
@@ -69,9 +71,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im
};
auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel;
auto dstSlicePitch = inputSlicePitch ? inputSlicePitch : globalRegion[1] * dstRowPitch;
auto stagingBufferManager = this->context->getStagingBufferManager();
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, bytesPerPixel, chunkWrite, &csr, isRead);
auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, dstSlicePitch, bytesPerPixel, chunkWrite, &csr, isRead);
if (isRead && context->isProvidingPerformanceHints()) {
auto hostPtrSize = calculateHostPtrSizeForImage(globalRegion, inputRowPitch, inputSlicePitch, image);

View File

@@ -1139,10 +1139,10 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledThenReturn
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchThenReturnSuccess) {
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
region[0] = MemoryConstants::megaByte / srcImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, srcImage, false, origin, region, 0u, 0u, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueReadImageCounter);
@@ -1234,4 +1234,25 @@ HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledWithGpuHan
EXPECT_EQ(res, CL_OUT_OF_RESOURCES);
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
}
HWTEST_F(ReadImageStagingBufferTest, whenEnqueueStagingReadImageCalledFor3DImageThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
cl_image_desc imageDesc = {};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
imageDesc.num_mip_levels = 0;
imageDesc.image_width = 4;
imageDesc.image_height = 4;
imageDesc.image_depth = 64;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {2, 2, 4};
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
// (2, 2, 4) splitted into (2, 2, 2) * 2
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueReadImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}

View File

@@ -866,10 +866,10 @@ HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledThenRetu
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchThenReturnSuccess) {
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledWithoutRowPitchNorSlicePitchThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
region[0] = MemoryConstants::megaByte / dstImage->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes;
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, MemoryConstants::megaByte, ptr, nullptr);
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, dstImage, false, origin, region, 0u, 0u, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
EXPECT_EQ(4ul, mockCommandQueueHw.enqueueWriteImageCounter);
@@ -962,4 +962,25 @@ HWTEST_F(WriteImageStagingBufferTest, givenIsValidForStagingTransferWhenUserPtrI
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(buffer, mappedPtr, buffer->getSize(), CL_COMMAND_WRITE_IMAGE, false, false));
delete buffer;
}
HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledFor3DImageThenReturnSuccess) {
MockCommandQueueHw<FamilyType> mockCommandQueueHw(context, device.get(), &props);
cl_image_desc imageDesc = {};
imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D;
imageDesc.num_mip_levels = 0;
imageDesc.image_width = 4;
imageDesc.image_height = 4;
imageDesc.image_depth = 64;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {2, 2, 4};
auto image = std::unique_ptr<Image>(ImageHelper<Image3dDefaults>::create(context, &imageDesc));
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, image.get(), false, origin, region, 4u, MemoryConstants::megaByte, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
// (2, 2, 4) splitted into (2, 2, 2) * 2
EXPECT_EQ(2ul, mockCommandQueueHw.enqueueWriteImageCounter);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
}