feature: force stateless for copy buffer to image

Related-to: NEO-6075

Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
Damian Tomczak
2025-07-12 00:41:33 +00:00
committed by Compute-Runtime-Automation
parent c7519acaac
commit 938d28ee5d
5 changed files with 113 additions and 12 deletions

View File

@@ -25,8 +25,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
const cl_event *eventWaitList,
cl_event *event) {
const bool useStateless = forceStateless(srcBuffer->getSize());
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, this->heaplessModeEnabled);
const bool isStateless = isForceStateless || forceStateless(srcBuffer->getSize());
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, this->heaplessModeEnabled);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());

View File

@@ -134,9 +134,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImageImpl(
dc.bcsSplit = bcsSplit;
dc.direction = csrSelectionArgs.direction;
const bool useStateless = forceStateless(dstImage->getSize());
const bool isStateless = isForceStateless || forceStateless(dstImage->getSize());
const bool useHeapless = getHeaplessModeEnabled();
auto eBuiltInOps = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, useHeapless);
auto eBuiltInOps = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, useHeapless);
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);

View File

@@ -178,6 +178,7 @@ HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueWriteImageCallWhenBuiltin
debugManager.flags.EnableCopyWithStagingBuffers.set(0);
bool heaplessAllowed = UnitTestHelper<FamilyType>::isHeaplessAllowed();
const bool useStateless = pDevice->getCompilerProductHelper().isForceToStatelessRequired();
for (auto useHeapless : {false, heaplessAllowed}) {
if (useHeapless && !heaplessAllowed) {
@@ -185,7 +186,7 @@ HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueWriteImageCallWhenBuiltin
}
reinterpret_cast<MockCommandQueueHw<FamilyType> *>(pCmdQ)->heaplessModeEnabled = useHeapless;
setUpImpl(EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(false, useHeapless));
setUpImpl(EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, useHeapless));
std::unique_ptr<Image> dstImage(ImageHelperUlt<ImageUseHostPtr<Image2dDefaults>>::create(context));

View File

@@ -18,6 +18,7 @@
#include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h"
#include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_builder.h"
#include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h"
#include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
@@ -214,15 +215,23 @@ typedef EnqueueCopyBufferToImageMipMapTest MipMapCopyBufferToImageTest;
HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet) {
auto imageType = (cl_mem_object_type)GetParam();
auto builtIns = new MockBuiltins();
auto builtInType = EBuiltInOps::copyBufferToImage3d;
auto &compilerProductHelper = pDevice->getCompilerProductHelper();
if (compilerProductHelper.isForceToStatelessRequired()) {
builtInType = EBuiltInOps::copyBufferToImage3dStateless;
}
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), EBuiltInOps::copyBufferToImage3d),
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), builtInType),
pCmdQ->getClDevice());
// substitute original builder with mock builder
auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), EBuiltInOps::copyBufferToImage3d),
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), builtInType),
std::unique_ptr<NEO::BuiltinDispatchInfoBuilder>(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder)));
cl_int retVal = CL_SUCCESS;
@@ -274,7 +283,7 @@ HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBuffe
EXPECT_EQ(CL_SUCCESS, retVal);
auto &mockBuilder = static_cast<MockBuiltinDispatchInfoBuilder &>(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), EBuiltInOps::copyBufferToImage3d),
auto &mockBuilder = static_cast<MockBuiltinDispatchInfoBuilder &>(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), builtInType),
pCmdQ->getClDevice()));
auto params = mockBuilder.getBuiltinOpParams();
@@ -283,7 +292,7 @@ HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBuffe
// restore original builder and retrieve mock builder
auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), EBuiltInOps::copyBufferToImage3d),
adjustBuiltInType(pCmdQ->getHeaplessModeEnabled(), builtInType),
std::move(oldBuilder));
EXPECT_NE(nullptr, newBuilder);
}
@@ -359,7 +368,7 @@ HWTEST_F(EnqueueCopyBufferToImageStatelessTest, givenGpuHangAndBlockingCallAndBi
using EnqueueCopyBufferToImageStatefulTest = EnqueueCopyBufferToImageHw;
HWTEST_F(EnqueueCopyBufferToImageStatefulTest, givenBigBufferWhenCopyingBufferToImageStatefulThenSuccessIsReturned) {
HWTEST2_F(EnqueueCopyBufferToImageStatefulTest, givenBigBufferWhenCopyingBufferToImageStatefulThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) {
auto cmdQ = std::make_unique<CommandQueueStateful<FamilyType>>(context.get(), device.get());
if (cmdQ->getHeaplessModeEnabled()) {
GTEST_SKIP();
@@ -397,3 +406,47 @@ HWTEST_F(OneMipLevelCopyBufferToImageImageTests, GivenNotMippedImageWhenCopyingB
EXPECT_TRUE(builtinOpsParamsCaptured);
EXPECT_EQ(0u, usedBuiltinOpsParams.dstMipLevel);
}
HWTEST_F(EnqueueCopyBufferToImageTest, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueCopyBufferToImageCallThenStatelessIsUsed) {
struct FourGbMockBuffer : MockBuffer {
size_t getSize() const override { return static_cast<size_t>(4ull * MemoryConstants::gigaByte); }
};
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
if (is32bit) {
GTEST_SKIP();
}
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(pCmdQ);
mockCmdQ->isForceStateless = false;
EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(true, pCmdQ->getHeaplessModeEnabled());
auto builtIns = new MockBuiltins();
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
// substitute original builder with mock builder
auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::unique_ptr<NEO::BuiltinDispatchInfoBuilder>(new MockBuilder(*builtIns, pCmdQ->getClDevice())));
FourGbMockBuffer srcBuffer;
auto mockBuilder = static_cast<MockBuilder *>(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
copyBuiltIn,
*pClDevice));
EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, &srcBuffer, dstImage);
EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
// restore original builder and retrieve mock builder
auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::move(oldBuilder));
EXPECT_EQ(mockBuilder, newBuilder.get());
}

View File

@@ -22,9 +22,11 @@
#include "opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h"
#include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h"
#include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h"
#include "opencl/test/unit_test/mocks/mock_builder.h"
#include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h"
#include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_image.h"
using namespace NEO;
@@ -267,7 +269,7 @@ HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenWriteImageIsCalledThenRowPi
auto builtIns = new MockBuiltins();
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
const bool useStateless = false;
const bool useStateless = pDevice->getCompilerProductHelper().isForceToStatelessRequired();
auto copyBuiltIn = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, pCmdQ->getHeaplessModeEnabled());
auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
@@ -420,7 +422,8 @@ HWTEST_P(MipMapWriteImageTest, GivenImageWithMipLevelNonZeroWhenReadImageIsCalle
auto builtIns = new MockBuiltins();
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
EBuiltInOps::Type eBuiltInOp = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(false, pCmdQ->getHeaplessModeEnabled());
const bool useStateless = pDevice->getCompilerProductHelper().isForceToStatelessRequired();
EBuiltInOps::Type eBuiltInOp = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, pCmdQ->getHeaplessModeEnabled());
auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
eBuiltInOp,
@@ -999,4 +1002,48 @@ HWTEST_F(WriteImageStagingBufferTest, whenEnqueueStagingWriteImageCalledForMipMa
auto res = mockCommandQueueHw.enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, image.get(), false, origin, region, 4u, pitchSize, ptr, nullptr);
EXPECT_EQ(res, CL_SUCCESS);
}
HWTEST_F(EnqueueWriteImageTest, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueWriteImageCallThenStatelessIsUsed) {
struct FourGbMockImage : MockImageBase {
size_t getSize() const override { return static_cast<size_t>(4ull * MemoryConstants::gigaByte); }
};
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
if (is32bit) {
GTEST_SKIP();
}
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(pCmdQ);
mockCmdQ->isForceStateless = false;
EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(true, pCmdQ->getHeaplessModeEnabled());
auto builtIns = new MockBuiltins();
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
// substitute original builder with mock builder
auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::unique_ptr<NEO::BuiltinDispatchInfoBuilder>(new MockBuilder(*builtIns, pCmdQ->getClDevice())));
FourGbMockImage dstImage;
auto mockBuilder = static_cast<MockBuilder *>(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
copyBuiltIn,
*pClDevice));
EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, &dstImage);
EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
// restore original builder and retrieve mock builder
auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::move(oldBuilder));
EXPECT_EQ(mockBuilder, newBuilder.get());
}