diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index c5292c6536..ea503ff7b3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -188,13 +188,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K auto maxWgCountPerTile = kernel->getMaxWgCountPerTile(this->engineGroupType); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - .eventAddress = 0, - .postSyncImmValue = static_cast(Event::STATE_SIGNALED), - .inOrderCounterValue = 0, - .inOrderIncrementGpuAddress = 0, - .inOrderIncrementValue = 0, .device = neoDevice, - .inOrderExecInfo = nullptr, .dispatchInterface = kernel, .surfaceStateHeap = ssh, .dynamicStateHeap = dsh, @@ -205,6 +199,22 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K .outImplicitArgsPtr = nullptr, .additionalCommands = &additionalCommands, .extendedArgs = nullptr, + .postSyncArgs = { + .eventAddress = 0, + .postSyncImmValue = static_cast(Event::STATE_SIGNALED), + .inOrderCounterValue = 0, + .inOrderIncrementGpuAddress = 0, + .inOrderIncrementValue = 0, + .device = neoDevice, + .inOrderExecInfo = nullptr, + .isTimestampEvent = false, + .isHostScopeSignalEvent = false, + .isKernelUsingSystemAllocation = false, + .dcFlushEnable = this->dcFlushSupport, + .interruptEvent = false, + .isFlushL3ForExternalAllocationRequired = false, + .isFlushL3ForHostUsmRequired = false, + }, .preemptionMode = commandListPreemptionMode, .requiredPartitionDim = launchParams.requiredPartitionDim, .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, @@ -215,22 +225,15 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, .isIndirect = launchParams.isIndirect, .isPredicate = launchParams.isPredicate, - .isTimestampEvent = false, .requiresUncachedMocs = uncachedMocsKernel, .isInternal = internalUsage, .isCooperative = launchParams.isCooperative, - .isHostScopeSignalEvent = false, - .isKernelUsingSystemAllocation = false, .isKernelDispatchedFromImmediateCmdList = isImmediateType(), .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, - .dcFlushEnable = this->dcFlushSupport, .isHeaplessModeEnabled = this->heaplessModeEnabled, .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, - .interruptEvent = false, .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, .makeCommandView = false, - .isFlushL3AfterPostSyncForExternalAllocationRequired = false, - .isFlushL3AfterPostSyncForHostUsmRequired = false, }; NEO::EncodeDispatchKernel::encodeCommon(commandContainer, dispatchKernelArgs); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index f81bfa9653..88d5bf14df 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -366,13 +366,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K NEO::EncodeKernelArgsExt dispatchKernelArgsExt = {}; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - .eventAddress = eventAddress, - .postSyncImmValue = static_cast(Event::STATE_SIGNALED), - .inOrderCounterValue = inOrderCounterValue, - .inOrderIncrementGpuAddress = inOrderIncrementGpuAddress, - .inOrderIncrementValue = inOrderIncrementValue, .device = neoDevice, - .inOrderExecInfo = inOrderExecInfo, .dispatchInterface = kernel, .surfaceStateHeap = ssh, .dynamicStateHeap = dsh, @@ -383,6 +377,22 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K .outImplicitArgsPtr = nullptr, .additionalCommands = &additionalCommands, .extendedArgs = &dispatchKernelArgsExt, + .postSyncArgs = { + .eventAddress = eventAddress, + .postSyncImmValue = static_cast(Event::STATE_SIGNALED), + .inOrderCounterValue = inOrderCounterValue, + .inOrderIncrementGpuAddress = inOrderIncrementGpuAddress, + .inOrderIncrementValue = inOrderIncrementValue, + .device = neoDevice, + .inOrderExecInfo = inOrderExecInfo, + .isTimestampEvent = isTimestampEvent, + .isHostScopeSignalEvent = isHostSignalScopeEvent, + .isKernelUsingSystemAllocation = isKernelUsingSystemAllocation, + .dcFlushEnable = this->dcFlushSupport, + .interruptEvent = interruptEvent, + .isFlushL3ForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation, + .isFlushL3ForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation, + }, .preemptionMode = kernelPreemptionMode, .requiredPartitionDim = launchParams.requiredPartitionDim, .requiredDispatchWalkOrder = launchParams.requiredDispatchWalkOrder, @@ -393,22 +403,15 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K .defaultPipelinedThreadArbitrationPolicy = this->defaultPipelinedThreadArbitrationPolicy, .isIndirect = launchParams.isIndirect, .isPredicate = launchParams.isPredicate, - .isTimestampEvent = isTimestampEvent, .requiresUncachedMocs = uncachedMocsKernel, .isInternal = internalUsage, .isCooperative = launchParams.isCooperative, - .isHostScopeSignalEvent = isHostSignalScopeEvent, - .isKernelUsingSystemAllocation = isKernelUsingSystemAllocation, .isKernelDispatchedFromImmediateCmdList = isImmediateType(), .isRcs = engineGroupType == NEO::EngineGroupType::renderCompute, - .dcFlushEnable = this->dcFlushSupport, .isHeaplessModeEnabled = this->heaplessModeEnabled, .isHeaplessStateInitEnabled = this->heaplessStateInitEnabled, - .interruptEvent = interruptEvent, .immediateScratchAddressPatching = !this->scratchAddressPatchingEnabled, .makeCommandView = launchParams.makeKernelCommandView, - .isFlushL3AfterPostSyncForExternalAllocationRequired = isFlushL3AfterPostSync && isKernelUsingExternalAllocation, - .isFlushL3AfterPostSyncForHostUsmRequired = isFlushL3AfterPostSync && isKernelUsingSystemAllocation, }; setAdditionalDispatchKernelArgsFromLaunchParams(dispatchKernelArgs, launchParams); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index d1b5c577a6..48bd7829af 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -208,7 +208,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA const uint32_t threadGroupDimensions[3] = {1, 1, 1}; auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false); - dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true); + dispatchKernelArgs.postSyncArgs.dcFlushEnable = commandList->getDcFlushRequired(true); NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 165355bb8d..8819d4b366 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -918,7 +918,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA const uint32_t threadGroupDimensions[3] = {1, 1, 1}; auto dispatchKernelArgs = CommandEncodeStatesFixture::createDefaultDispatchKernelArgs(device->getNEODevice(), kernel.get(), threadGroupDimensions, false); - dispatchKernelArgs.dcFlushEnable = commandList->getDcFlushRequired(true); + dispatchKernelArgs.postSyncArgs.dcFlushEnable = commandList->getDcFlushRequired(true); EXPECT_THROW(NEO::EncodeDispatchKernel::template encode(commandContainer, dispatchKernelArgs), std::exception); } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 7c5e4ab5b0..0bce113566 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -49,7 +49,7 @@ struct StateComputeModeProperties; struct ImplicitArgs; struct EncodeKernelArgsExt; -struct EncodeDispatchKernelArgs { +struct EncodePostSyncArgs { uint64_t eventAddress = 0; uint64_t postSyncImmValue = 0; uint64_t inOrderCounterValue = 0; @@ -57,6 +57,53 @@ struct EncodeDispatchKernelArgs { uint64_t inOrderIncrementValue = 0; Device *device = nullptr; NEO::InOrderExecInfo *inOrderExecInfo = nullptr; + bool isTimestampEvent = false; + bool isHostScopeSignalEvent = false; + bool isKernelUsingSystemAllocation = false; + bool dcFlushEnable = false; + bool interruptEvent = false; + bool isFlushL3ForExternalAllocationRequired = false; + bool isFlushL3ForHostUsmRequired = false; + + bool requiresSystemMemoryFence() const { + return (isHostScopeSignalEvent && isKernelUsingSystemAllocation && this->device->getProductHelper().isGlobalFenceInPostSyncRequired(this->device->getHardwareInfo())); + } +}; + +template +struct EncodePostSync { + static constexpr size_t timestampDestinationAddressAlignment = 16; + static constexpr size_t immWriteDestinationAddressAlignment = 8; + + template + static void encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args); + + template + static void setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args); + + template + static void setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args); + + static uint32_t getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, const bool dcFlush); + + template + static auto &getPostSync(CommandType &cmd, size_t index); + + template + static void setPostSyncData(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData, [[maybe_unused]] const uint32_t atomicOpcode, const uint32_t mocs, [[maybe_unused]] const bool interrupt, const bool requiresSystemMemoryFence); + + template + static void setPostSyncDataCommon(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData); + + template + static void setCommandLevelInterrupt(CommandType &cmd, bool interrupt); + + template + static void adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args); +}; + +struct EncodeDispatchKernelArgs { + Device *device = nullptr; DispatchKernelEncoderI *dispatchInterface = nullptr; IndirectHeap *surfaceStateHeap = nullptr; IndirectHeap *dynamicStateHeap = nullptr; @@ -67,6 +114,7 @@ struct EncodeDispatchKernelArgs { void *outImplicitArgsPtr = nullptr; std::list *additionalCommands = nullptr; EncodeKernelArgsExt *extendedArgs = nullptr; + NEO::EncodePostSyncArgs postSyncArgs{}; PreemptionMode preemptionMode = PreemptionMode::Initial; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; @@ -77,26 +125,17 @@ struct EncodeDispatchKernelArgs { int32_t defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent; bool isIndirect = false; bool isPredicate = false; - bool isTimestampEvent = false; bool requiresUncachedMocs = false; bool isInternal = false; bool isCooperative = false; - bool isHostScopeSignalEvent = false; - bool isKernelUsingSystemAllocation = false; bool isKernelDispatchedFromImmediateCmdList = false; bool isRcs = false; - bool dcFlushEnable = false; bool isHeaplessModeEnabled = false; bool isHeaplessStateInitEnabled = false; - bool interruptEvent = false; bool immediateScratchAddressPatching = false; bool makeCommandView = false; bool isFlushL3AfterPostSyncForExternalAllocationRequired = false; bool isFlushL3AfterPostSyncForHostUsmRequired = false; - - bool requiresSystemMemoryFence() const { - return (isHostScopeSignalEvent && isKernelUsingSystemAllocation && this->device->getProductHelper().isGlobalFenceInPostSyncRequired(this->device->getHardwareInfo())); - } }; struct EncodeStoreMMIOParams { @@ -709,58 +748,4 @@ struct EnodeUserInterrupt { static void encode(LinearStream &commandStream); }; -struct EncodePostSyncArgs { - uint64_t eventAddress = 0; - uint64_t postSyncImmValue = 0; - uint64_t inOrderCounterValue = 0; - uint64_t inOrderIncrementGpuAddress = 0; - uint64_t inOrderIncrementValue = 0; - Device *device = nullptr; - NEO::InOrderExecInfo *inOrderExecInfo = nullptr; - bool isTimestampEvent = false; - bool isHostScopeSignalEvent = false; - bool isKernelUsingSystemAllocation = false; - bool dcFlushEnable = false; - bool interruptEvent = false; - bool isFlushL3ForExternalAllocationRequired = false; - bool isFlushL3ForHostUsmRequired = false; - bool requiresSystemMemoryFence() const { - return (isHostScopeSignalEvent && isKernelUsingSystemAllocation && this->device->getProductHelper().isGlobalFenceInPostSyncRequired(this->device->getHardwareInfo())); - } -}; - -template -struct EncodePostSync { - static constexpr size_t timestampDestinationAddressAlignment = 16; - static constexpr size_t immWriteDestinationAddressAlignment = 8; - - static EncodePostSyncArgs createPostSyncArgs(const EncodeDispatchKernelArgs &args); - - template - static void encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args); - - template - static void setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args); - - template - static void setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args); - - static uint32_t getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, const bool dcFlush); - - template - static auto &getPostSync(CommandType &cmd, size_t index); - - template - static void setPostSyncData(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData, [[maybe_unused]] const uint32_t atomicOpcode, const uint32_t mocs, [[maybe_unused]] const bool interrupt, const bool requiresSystemMemoryFence); - - template - static void setPostSyncDataCommon(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData); - - template - static void setCommandLevelInterrupt(CommandType &cmd, bool interrupt); - - template - static void adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args); -}; - } // namespace NEO diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index f280beaf1a..b9179d8872 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -1173,23 +1173,4 @@ void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container container.getDevice()->getRootDeviceEnvironment()); } -template -EncodePostSyncArgs EncodePostSync::createPostSyncArgs(const EncodeDispatchKernelArgs &args) { - return EncodePostSyncArgs{ - .eventAddress = args.eventAddress, - .postSyncImmValue = args.postSyncImmValue, - .inOrderCounterValue = args.inOrderCounterValue, - .inOrderIncrementGpuAddress = args.inOrderIncrementGpuAddress, - .inOrderIncrementValue = args.inOrderIncrementValue, - .device = args.device, - .inOrderExecInfo = args.inOrderExecInfo, - .isTimestampEvent = args.isTimestampEvent, - .isHostScopeSignalEvent = args.isHostScopeSignalEvent, - .isKernelUsingSystemAllocation = args.isKernelUsingSystemAllocation, - .dcFlushEnable = args.dcFlushEnable, - .interruptEvent = args.interruptEvent, - .isFlushL3ForExternalAllocationRequired = args.isFlushL3AfterPostSyncForExternalAllocationRequired, - .isFlushL3ForHostUsmRequired = args.isFlushL3AfterPostSyncForHostUsmRequired}; -} - } // namespace NEO diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl index 5aef82b8b0..e170cc12e9 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl @@ -95,10 +95,6 @@ template template void EncodePostSync::encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args) {} -template -template -void EncodePostSync::setCommandLevelInterrupt(CommandType &cmd, bool interrupt) {} - template template void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {} diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 479f7f3f5d..5ab806d8db 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -304,7 +304,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.requiresUncachedMocs) { PipeControlArgs syncArgs; - syncArgs.dcFlushEnable = args.dcFlushEnable; + syncArgs.dcFlushEnable = args.postSyncArgs.dcFlushEnable; MemorySynchronizationCommands::addSingleBarrier(*container.getCommandStream(), syncArgs); STATE_BASE_ADDRESS sbaCmd; auto gmmHelper = container.getDevice()->getGmmHelper(); @@ -370,11 +370,10 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis requiredWorkgroupOrder, rootDeviceEnvironment); - auto postSyncArgs = EncodePostSync::createPostSyncArgs(args); - if (args.inOrderExecInfo) { - EncodePostSync::setupPostSyncForInOrderExec(walkerCmd, postSyncArgs); - } else if (args.eventAddress) { - EncodePostSync::setupPostSyncForRegularEvent(walkerCmd, postSyncArgs); + if (args.postSyncArgs.inOrderExecInfo) { + EncodePostSync::setupPostSyncForInOrderExec(walkerCmd, args.postSyncArgs); + } else if (args.postSyncArgs.eventAddress) { + EncodePostSync::setupPostSyncForRegularEvent(walkerCmd, args.postSyncArgs); } else { EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(walkerCmd); } @@ -416,7 +415,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder, .localRegionSize = args.localRegionSize, .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads, - .requiredSystemFence = args.requiresSystemMemoryFence(), + .requiredSystemFence = args.postSyncArgs.requiresSystemMemoryFence(), .hasSample = kernelDescriptor.kernelAttributes.flags.hasSample, .l0DebuggerEnabled = args.device->getL0Debugger() != nullptr}; @@ -442,7 +441,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.maxWgCountPerTile, // maxWgCountPerTile !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer !args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup - args.dcFlushEnable, // dcFlush + args.postSyncArgs.dcFlushEnable, // dcFlush EncodeDispatchKernel::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile args.makeCommandView, // blockDispatchToCommandBuffer isRequiredDispatchWorkGroupOrder}; // isRequiredDispatchWorkGroupOrder diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 6ad19fd7a0..0ff08be6bf 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -230,7 +230,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (flush) { PipeControlArgs syncArgs; - syncArgs.dcFlushEnable = args.dcFlushEnable; + syncArgs.dcFlushEnable = args.postSyncArgs.dcFlushEnable; if (dirtyHeaps) { syncArgs.hdcPipelineFlush = true; } @@ -298,7 +298,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder, .localRegionSize = args.localRegionSize, .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads, - .requiredSystemFence = args.requiresSystemMemoryFence(), + .requiredSystemFence = args.postSyncArgs.requiresSystemMemoryFence(), .hasSample = false}; using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 85369aadb1..ac4f2e7065 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -704,6 +704,59 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenSetu EXPECT_EQ(0u, mocs); } +HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingAdjustTimestampPacketThenNothingHappen) { + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + MockExecutionEnvironment executionEnvironment{}; + DefaultWalkerType walkerCmd{}; + DefaultWalkerType walkerOnStart{}; + + EncodePostSyncArgs args = {.isTimestampEvent = true}; + EncodePostSync::template adjustTimestampPacket(walkerCmd, args); + EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change +} + +HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingEncodeL3FlushThenNothingHappen) { + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + MockExecutionEnvironment executionEnvironment{}; + DefaultWalkerType walkerCmd{}; + DefaultWalkerType walkerOnStart{}; + + EncodePostSyncArgs args = {.isFlushL3ForExternalAllocationRequired = true}; + EncodePostSync::template encodeL3Flush(walkerCmd, args); + EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change + + args = {.isFlushL3ForHostUsmRequired = true}; + EncodePostSync::template encodeL3Flush(walkerCmd, args); + EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change +} + +HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingSetupPostSyncForRegularEventThenNothingHappen) { + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + MockExecutionEnvironment executionEnvironment{}; + DefaultWalkerType walkerCmd{}; + DefaultWalkerType walkerOnStart{}; + + EncodePostSyncArgs args = {.eventAddress = 0x1234}; + EncodePostSync::template setupPostSyncForRegularEvent(walkerCmd, args); + EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change +} + +HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenCallingSetupPostSyncForInOrderExecThenNothingHappen) { + using DefaultWalkerType = typename FamilyType::DefaultWalkerType; + MockExecutionEnvironment executionEnvironment{}; + MockDevice mockDevice; + DefaultWalkerType walkerCmd{}; + DefaultWalkerType walkerOnStart{}; + + MockTagAllocator> deviceTagAllocator(0, mockDevice.getMemoryManager()); + auto deviceNode = deviceTagAllocator.getTag(); + + InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, mockDevice, 2, true, true); + EncodePostSyncArgs args = {.inOrderExecInfo = &inOrderExecInfo}; + EncodePostSync::template setupPostSyncForInOrderExec(walkerCmd, args); + EXPECT_EQ(0, memcmp(&walkerOnStart, &walkerCmd, sizeof(DefaultWalkerType))); // no change +} + HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSetupPostSyncMocsThenCorrect) { using DefaultWalkerType = typename FamilyType::DefaultWalkerType; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 495d5ff7f7..36c7458f21 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -712,7 +712,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispa bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); + dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp index a07569805f..5b6f589de2 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.cpp @@ -33,8 +33,8 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeAndPVCA bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -58,8 +58,8 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenDebugVariableToForceL1FlushWh bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -82,9 +82,9 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenEventAddressWhenEncodeThenMoc bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; - dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index f7185b5df1..5316d67184 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -126,8 +126,8 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDebugVariableWhenPostSyncIsPr auto inOrderExecInfo = InOrderExecInfo::create(deviceTagAllocator.getTag(), nullptr, *pDevice, 1, false); - dispatchArgs.inOrderExecInfo = inOrderExecInfo.get(); - auto postSyncArgs = EncodePostSync::createPostSyncArgs(dispatchArgs); + dispatchArgs.postSyncArgs.inOrderExecInfo = inOrderExecInfo.get(); + auto &postSyncArgs = dispatchArgs.postSyncArgs; EncodePostSync::template setupPostSyncForInOrderExec(walkerCmd, postSyncArgs); auto &postSyncData = walkerCmd.getPostSync(); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 340d0f534f..abdfeb0b7b 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -395,8 +395,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAllocationWhenDi bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -418,9 +418,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; - dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -1208,7 +1208,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); dispatchArgs.isInternal = isInternal; dispatchArgs.partitionCount = 2; - dispatchArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); + dispatchArgs.postSyncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, pDevice->getRootDeviceEnvironment()); EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -1244,7 +1244,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesDynamicImplicitScaling, givenImp args.emitPipeControlStall = true; args.partitionCount = dispatchArgs.partitionCount; args.emitSelfCleanup = true; - args.dcFlushEnable = dispatchArgs.dcFlushEnable; + args.dcFlushEnable = dispatchArgs.postSyncArgs.dcFlushEnable; auto cleanupSectionOffset = WalkerPartition::computeControlSectionOffset(args); uint64_t expectedCleanupGpuVa = cmdContainer->getCommandStream()->getGraphicsAllocation()->getGpuAddress() + @@ -1389,8 +1389,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenNonTimestampEventWhen bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.isTimestampEvent = true; + dispatchArgs.postSyncArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.isTimestampEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -1522,7 +1522,7 @@ struct CommandEncodeStatesImplicitScalingPrimaryBufferFixture : public CommandEn bool requiresUncachedMocs = false; uint64_t eventAddress = 0xFF112233000; EncodeDispatchKernelArgs dispatchArgs = BaseClass::createDefaultDispatchKernelArgs(BaseClass::pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; + dispatchArgs.postSyncArgs.eventAddress = eventAddress; dispatchArgs.partitionCount = 2; EncodeDispatchKernel::template encode(*BaseClass::cmdContainer.get(), dispatchArgs); diff --git a/shared/test/unit_test/fixtures/command_container_fixture.h b/shared/test/unit_test/fixtures/command_container_fixture.h index a2b16607ec..ad3b184a3a 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.h +++ b/shared/test/unit_test/fixtures/command_container_fixture.h @@ -35,13 +35,7 @@ class CommandEncodeStatesFixture : public DeviceFixture { static EncodeDispatchKernelArgs createDefaultDispatchKernelArgs(Device *device, DispatchKernelEncoderI *dispatchInterface, const void *threadGroupDimensions, bool requiresUncachedMocs) { EncodeDispatchKernelArgs args{ - .eventAddress = 0, - .postSyncImmValue = 0, - .inOrderCounterValue = 0, - .inOrderIncrementGpuAddress = 0, - .inOrderIncrementValue = 0, .device = device, - .inOrderExecInfo = nullptr, .dispatchInterface = dispatchInterface, .surfaceStateHeap = nullptr, .dynamicStateHeap = nullptr, @@ -52,6 +46,22 @@ class CommandEncodeStatesFixture : public DeviceFixture { .outImplicitArgsPtr = nullptr, .additionalCommands = nullptr, .extendedArgs = nullptr, + .postSyncArgs = { + .eventAddress = 0, + .postSyncImmValue = 0, + .inOrderCounterValue = 0, + .inOrderIncrementGpuAddress = 0, + .inOrderIncrementValue = 0, + .device = device, + .inOrderExecInfo = nullptr, + .isTimestampEvent = false, + .isHostScopeSignalEvent = false, + .isKernelUsingSystemAllocation = false, + .dcFlushEnable = false, + .interruptEvent = false, + .isFlushL3ForExternalAllocationRequired = false, + .isFlushL3ForHostUsmRequired = false, + }, .preemptionMode = PreemptionMode::Disabled, .requiredPartitionDim = NEO::RequiredPartitionDim::none, .requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none, @@ -62,22 +72,15 @@ class CommandEncodeStatesFixture : public DeviceFixture { .defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent, .isIndirect = false, .isPredicate = false, - .isTimestampEvent = false, .requiresUncachedMocs = requiresUncachedMocs, .isInternal = false, .isCooperative = false, - .isHostScopeSignalEvent = false, - .isKernelUsingSystemAllocation = false, .isKernelDispatchedFromImmediateCmdList = false, .isRcs = false, - .dcFlushEnable = false, .isHeaplessModeEnabled = false, .isHeaplessStateInitEnabled = false, - .interruptEvent = false, .immediateScratchAddressPatching = false, .makeCommandView = false, - .isFlushL3AfterPostSyncForExternalAllocationRequired = false, - .isFlushL3AfterPostSyncForHostUsmRequired = false, }; return args; diff --git a/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp index 7047d34e86..f9b2816aa8 100644 --- a/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/test_encode_xe2_hpg_core.cpp @@ -373,7 +373,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -399,7 +399,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenEv dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -425,8 +425,8 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isKernelUsingSystemAllocation = true; - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); diff --git a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp index 2ea90a2df2..de9bc9321d 100644 --- a/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp +++ b/shared/test/unit_test/xe3_core/test_encode_xe3_core.cpp @@ -314,7 +314,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -340,7 +340,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenEventHost dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -366,8 +366,8 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isKernelUsingSystemAllocation = true; - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 6ada5b8afa..76b94fb751 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -374,7 +374,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -410,7 +410,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenEven bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); @@ -445,8 +445,8 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDefaultSettingForFenceWhenKern dispatchInterface->getCrossThreadDataSizeResult = 0; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, false); - dispatchArgs.isKernelUsingSystemAllocation = true; - dispatchArgs.isHostScopeSignalEvent = true; + dispatchArgs.postSyncArgs.isKernelUsingSystemAllocation = true; + dispatchArgs.postSyncArgs.isHostScopeSignalEvent = true; EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs);