fix: use stateless kernel when append memory fill on XeHpc and later

Related-to: NEO-6075

Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
Damian Tomczak
2024-09-19 10:38:52 +00:00
committed by Compute-Runtime-Automation
parent 672d8414f5
commit 5c9fc079b0
3 changed files with 32 additions and 1 deletions

View File

@@ -1943,7 +1943,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
bool isStateless = this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless;
bool isStateless = (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) || this->isStatelessBuiltinsEnabled();
const bool isHeapless = this->isHeaplessModeEnabled();
NEO::Device *neoDevice = device->getNEODevice();

View File

@@ -129,6 +129,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
::L0::Event *event,
CmdListKernelLaunchParams &launchParams) override {
kernelUsed = kernel;
usedKernelLaunchParams = launchParams;
if (launchParams.isKernelSplitOperation && (launchParams.numKernelsExecutedInSplitLaunch == 0)) {
firstKernelInSplitOperation = kernel;
@@ -168,6 +169,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
::L0::Kernel *firstKernelInSplitOperation = nullptr;
ze_event_handle_t appendEventMultipleKernelIndirectEventHandleValue = nullptr;
ze_event_handle_t appendEventKernelIndirectEventHandleValue = nullptr;
Kernel *kernelUsed;
};
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -994,6 +994,35 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyWithOneReservedD
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
}
HWTEST2_F(CommandListTest, givenStatelessWhenAppendMemoryFillIsCalledThenCorrectBuiltinIsUsed, IsAtLeastXeHpcCore) {
auto &compilerProductHelper = device->getCompilerProductHelper();
ASSERT_TRUE(compilerProductHelper.isForceToStatelessRequired());
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
constexpr size_t allocSize = 4096;
constexpr size_t patternSize = 8;
uint8_t pattern[patternSize] = {1, 2, 3, 4};
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, false);
bool isStateless = true;
bool isHeapless = commandList->isHeaplessModeEnabled();
auto builtin = BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless);
Kernel *expectedBuiltinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
EXPECT_EQ(expectedBuiltinKernel, commandList->kernelUsed);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillInUsmHostThenBuiltinFlagAndDestinationAllocSystemIsSet, MatchAny) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);