diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 61b8499f59..acf1ea0bda 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams { bool isKernelSplitOperation = false; bool isBuiltInKernel = false; bool isDestinationAllocationInSystemMemory = false; + bool isHostSignalScopeEvent = false; }; struct CmdListReturnPoint { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index ad08458295..d2f24a6e15 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -303,13 +303,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ return ret; } + CmdListKernelLaunchParams launchParams = {}; Event *event = nullptr; if (hEvent) { event = Event::fromHandle(hEvent); + launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST); } appendEventForProfiling(event, true, false); - CmdListKernelLaunchParams launchParams = {}; launchParams.isIndirect = true; ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer, nullptr, launchParams); @@ -332,9 +333,14 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd return ret; } + CmdListKernelLaunchParams launchParams = {}; + launchParams.isIndirect = true; + launchParams.isPredicate = true; + Event *event = nullptr; if (hEvent) { event = Event::fromHandle(hEvent); + launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST); } appendEventForProfiling(event, true, false); @@ -346,9 +352,6 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd for (uint32_t i = 0; i < numKernels; i++) { NEO::EncodeMathMMIO::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i); - CmdListKernelLaunchParams launchParams = {}; - launchParams.isIndirect = true; - launchParams.isPredicate = true; ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]), haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr, nullptr, launchParams); @@ -1183,17 +1186,18 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, return ret; } + CmdListKernelLaunchParams launchParams = {}; + Event *signalEvent = nullptr; if (hSignalEvent) { signalEvent = Event::fromHandle(hSignalEvent); + launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST); } uint32_t kernelCounter = leftSize > 0 ? 1 : 0; kernelCounter += middleSizeBytes > 0 ? 1 : 0; kernelCounter += rightSize > 0 ? 1 : 0; - CmdListKernelLaunchParams launchParams = {}; - launchParams.isKernelSplitOperation = kernelCounter > 1; bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation; @@ -1544,9 +1548,12 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } + CmdListKernelLaunchParams launchParams = {}; + Event *signalEvent = nullptr; if (hSignalEvent) { signalEvent = Event::fromHandle(hSignalEvent); + launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST); } if (isCopyOnly()) { @@ -1596,7 +1603,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } } - CmdListKernelLaunchParams launchParams = {}; launchParams.isBuiltInKernel = true; launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index b5bdc4b529..46ddb105cc 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -167,7 +167,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K uint64_t eventAddress = 0; bool isTimestampEvent = false; bool l3FlushEnable = false; - bool isHostSignalScopeEvent = false; + bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent; if (event) { eventAlloc = &event->getAllocation(this->device); commandContainer.addToResidencyContainer(eventAlloc); diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 9efb9a3fad..d27366bd36 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -1364,5 +1364,171 @@ HWTEST2_F(CreateCommandListXeHpcTest, whenFlagDisabledAndCreateImmediateCommandL EXPECT_TRUE(static_cast(device)->allocationsForReuse->peekIsEmpty()); } +struct AppendKernelXeHpcTestInput { + DriverHandle *driver = nullptr; + L0::Context *context = nullptr; + L0::Device *device = nullptr; +}; + +template +struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModuleFixture { + void setUp() { + DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync); + LocalMemoryModuleFixture::setUp(); + + input.driver = driverHandle.get(); + input.device = device; + input.context = context; + } + + template + void testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) { + using FamilyType = typename NEO::GfxFamilyMapper::GfxFamily; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + + ze_result_t result = ZE_RESULT_SUCCESS; + + auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + + VariableBackup hwRevId{&hwInfo.platform.usRevId}; + hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); + + constexpr size_t size = 4096u; + constexpr size_t alignment = 4096u; + void *ptr = nullptr; + const void *srcPtr = reinterpret_cast(0x1234); + + ze_device_mem_alloc_desc_t deviceDesc = {}; + result = input.context->allocDeviceMem(input.device->toHandle(), + &deviceDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + auto eventPool = std::unique_ptr(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = 0; + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, input.device)); + + auto commandList = std::make_unique>>(); + result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + constexpr size_t offset = 32; + void *copyPtr = reinterpret_cast(ptr) + offset; + result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList commands; + ASSERT_TRUE(CmdParse::parseCommandBuffer( + commands, + commandList->commandContainer.getCommandStream()->getCpuBase(), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(commands.begin(), commands.end()); + EXPECT_NE(0u, itorWalkers.size()); + for (const auto &it : itorWalkers) { + auto walkerCmd = genCmdCast(*it); + auto &postSyncData = walkerCmd->getPostSync(); + EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest()); + } + + result = input.context->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); + } + + template + void testHostSignalScopeHostMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) { + using FamilyType = typename NEO::GfxFamilyMapper::GfxFamily; + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + + ze_result_t result = ZE_RESULT_SUCCESS; + + auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + + VariableBackup hwRevId{&hwInfo.platform.usRevId}; + hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); + + constexpr size_t size = 4096u; + constexpr size_t alignment = 4096u; + void *ptr = nullptr; + const void *srcPtr = reinterpret_cast(0x1234); + + ze_host_mem_alloc_desc_t hostDesc = {}; + result = input.context->allocHostMem(&hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + auto eventPool = std::unique_ptr(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = 0; + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, input.device)); + + auto commandList = std::make_unique>>(); + result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + constexpr size_t offset = 32; + void *copyPtr = reinterpret_cast(ptr) + offset; + result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList commands; + ASSERT_TRUE(CmdParse::parseCommandBuffer( + commands, + commandList->commandContainer.getCommandStream()->getCpuBase(), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(commands.begin(), commands.end()); + EXPECT_NE(0u, itorWalkers.size()); + for (const auto &it : itorWalkers) { + auto walkerCmd = genCmdCast(*it); + auto &postSyncData = walkerCmd->getPostSync(); + EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest()); + } + + result = input.context->freeMem(ptr); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); + } + + AppendKernelXeHpcTestInput input; +}; + +using CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore = Test>; + +HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore, + givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) { + testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(input); +} + +HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore, + givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) { + testHostSignalScopeHostMemoryAppendMultiKernelCopy(input); +} + +using CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore = Test>; + +HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore, + givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) { + testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(input); +} + +HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore, + givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) { + testHostSignalScopeHostMemoryAppendMultiKernelCopy(input); +} + } // namespace ult } // namespace L0