diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index adf7f86e86..d0302c818f 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -111,8 +111,10 @@ void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, postSyncData.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); } - EncodeDispatchKernel::template setupPostSyncMocs(*walkerCmd, rootDeviceEnvironment, - MemorySynchronizationCommands::getDcFlushEnable(true, rootDeviceEnvironment)); + auto mocs = EncodePostSync::getPostSyncMocs(rootDeviceEnvironment, + MemorySynchronizationCommands::getDcFlushEnable(true, rootDeviceEnvironment)); + + postSyncData.setMocs(mocs); if (debugManager.flags.UseImmDataWriteModeOnPostSyncOperation.get()) { postSyncData.setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_IMMEDIATE_DATA); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 1ff34cb9d8..d3103d9165 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -226,24 +226,9 @@ struct EncodeDispatchKernel : public EncodeDispatchKernelBase { const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, WalkerType &walkerCmd); - template - static void adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); - - template - static void encodeL3FlushAfterPostSync(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); - - template - static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); - template static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder); - template - static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); - - template - static void setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); - template static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); @@ -723,4 +708,58 @@ struct EnodeUserInterrupt { static void encode(LinearStream &commandStream); }; +struct EncodePostSyncArgs { + uint64_t eventAddress = 0; + uint64_t postSyncImmValue = 0; + uint64_t inOrderCounterValue = 0; + uint64_t inOrderIncrementGpuAddress = 0; + uint64_t inOrderIncrementValue = 0; + Device *device = nullptr; + NEO::InOrderExecInfo *inOrderExecInfo = nullptr; + bool isTimestampEvent = false; + bool isHostScopeSignalEvent = false; + bool isKernelUsingSystemAllocation = false; + bool dcFlushEnable = false; + bool interruptEvent = false; + bool isFlushL3ForExternalAllocationRequired = false; + bool isFlushL3ForHostUsmRequired = false; + bool requiresSystemMemoryFence() const { + return (isHostScopeSignalEvent && isKernelUsingSystemAllocation); + } +}; + +template +struct EncodePostSync { + static constexpr size_t timestampDestinationAddressAlignment = 16; + static constexpr size_t immWriteDestinationAddressAlignment = 8; + + static EncodePostSyncArgs createPostSyncArgs(const EncodeDispatchKernelArgs &args); + + template + static void encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args); + + template + static void setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args); + + template + static void setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args); + + static uint32_t getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, const bool dcFlush); + + template + static auto &getPostSync(CommandType &cmd, size_t index); + + template + static void setPostSyncData(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData, [[maybe_unused]] const uint32_t atomicOpcode, const uint32_t mocs, [[maybe_unused]] const bool interrupt, const bool requiresSystemMemoryFence); + + template + static void setPostSyncDataCommon(PostSyncT &postSyncData, const typename PostSyncT::OPERATION operation, const uint64_t gpuVa, const uint64_t immData); + + template + static void setCommandLevelInterrupt(CommandType &cmd, bool interrupt); + + template + static void adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args); +}; + } // namespace NEO diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 23a4c23695..145aaa765d 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -1166,4 +1166,23 @@ void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container container.getDevice()->getRootDeviceEnvironment()); } +template +EncodePostSyncArgs EncodePostSync::createPostSyncArgs(const EncodeDispatchKernelArgs &args) { + return EncodePostSyncArgs{ + .eventAddress = args.eventAddress, + .postSyncImmValue = args.postSyncImmValue, + .inOrderCounterValue = args.inOrderCounterValue, + .inOrderIncrementGpuAddress = args.inOrderIncrementGpuAddress, + .inOrderIncrementValue = args.inOrderIncrementValue, + .device = args.device, + .inOrderExecInfo = args.inOrderExecInfo, + .isTimestampEvent = args.isTimestampEvent, + .isHostScopeSignalEvent = args.isHostScopeSignalEvent, + .isKernelUsingSystemAllocation = args.isKernelUsingSystemAllocation, + .dcFlushEnable = args.dcFlushEnable, + .interruptEvent = args.interruptEvent, + .isFlushL3ForExternalAllocationRequired = args.isFlushL3AfterPostSyncForExternalAllocationRequired, + .isFlushL3ForHostUsmRequired = args.isFlushL3AfterPostSyncForHostUsmRequired}; +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 2ebfed3ac9..0c1ee36460 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -9,14 +9,9 @@ template struct NEO::EncodeDispatchKernel; template void NEO::EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void NEO::EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void NEO::EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void NEO::EncodeDispatchKernel::encodeL3FlushAfterPostSync(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void NEO::EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::setGrfInfo(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); template void NEO::EncodeDispatchKernel::setupPreferredSlmSize(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void NEO::EncodeDispatchKernel::encodeThreadGroupDispatch(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, Family::DefaultWalkerType &walkerCmd); -template void NEO::EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void NEO::EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void NEO::EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); @@ -34,6 +29,13 @@ template void NEO::EncodeDispatchKernel::overrideDefaultValues::encodeWalkerPostSyncFields(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs); template void NEO::EncodeDispatchKernel::encodeComputeDispatchAllWalker(Family::DefaultWalkerType &walkerCmd, const Family::DefaultWalkerType::InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs); +template struct NEO::EncodePostSync; + +template void NEO::EncodePostSync::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodePostSyncArgs &args); +template void NEO::EncodePostSync::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodePostSyncArgs &args); +template void NEO::EncodePostSync::encodeL3Flush(Family::DefaultWalkerType &walkerCmd, const EncodePostSyncArgs &args); +template void NEO::EncodePostSync::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodePostSyncArgs &args); + template struct NEO::EncodeStates; template struct NEO::EncodeMediaInterfaceDescriptorLoad; diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl index ca6e5237fd..5aef82b8b0 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl @@ -43,29 +43,37 @@ bool EncodeDispatchKernel::singleTileExecImplicitScalingRequired(bool co } template -template -void EncodeDispatchKernel::setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) { - using POSTSYNC_DATA = decltype(Family::template getPostSyncType()); +template +void EncodePostSync::setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args) { + using POSTSYNC_DATA = decltype(Family::template getPostSyncType()); - auto &postSync = walkerCmd.getPostSync(); - - postSync.setDataportPipelineFlush(true); - postSync.setDataportSubsliceCacheFlush(true); - if (NEO::debugManager.flags.ForcePostSyncL1Flush.get() != -1) { - postSync.setDataportPipelineFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); - postSync.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); - } + auto &postSync = cmd.getPostSync(); uint64_t gpuVa = args.inOrderExecInfo->getBaseDeviceAddress() + args.inOrderExecInfo->getAllocationOffset(); - UNRECOVERABLE_IF(!(isAligned(gpuVa))); - postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA); - postSync.setImmediateData(args.inOrderCounterValue); - postSync.setDestinationAddress(gpuVa); + uint32_t mocs = getPostSyncMocs(args.device->getRootDeviceEnvironment(), args.dcFlushEnable); - EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, args.device->getRootDeviceEnvironment(), args.dcFlushEnable); - EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, args); + setPostSyncData(postSync, POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, gpuVa, args.inOrderCounterValue, 0, mocs, false, false); + adjustTimestampPacket(cmd, args); +} + +template +template +void EncodePostSync::setPostSyncData(PostSyncT &postSyncData, typename PostSyncT::OPERATION operation, uint64_t gpuVa, uint64_t immData, + [[maybe_unused]] uint32_t atomicOpcode, uint32_t mocs, [[maybe_unused]] bool interrupt, bool requiresSystemMemoryFence) { + setPostSyncDataCommon(postSyncData, operation, gpuVa, immData); + + postSyncData.setDataportPipelineFlush(true); + postSyncData.setDataportSubsliceCacheFlush(true); + + if (NEO::debugManager.flags.ForcePostSyncL1Flush.get() != -1) { + postSyncData.setDataportPipelineFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); + postSyncData.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); + } + + postSyncData.setMocs(mocs); + postSyncData.setSystemMemoryFenceRequest(requiresSystemMemoryFence); } template @@ -76,16 +84,20 @@ void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t a } template -template -void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {} +template +void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, CommandType &cmd, const EncodeWalkerArgs &walkerArgs) {} template -template -void EncodeDispatchKernel::adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args) {} template -template -void EncodeDispatchKernel::encodeL3FlushAfterPostSync(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args) {} + +template +template +void EncodePostSync::setCommandLevelInterrupt(CommandType &cmd, bool interrupt) {} template template diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 36d250bad0..7435556781 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -370,10 +370,11 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis requiredWorkgroupOrder, rootDeviceEnvironment); + auto postSyncArgs = EncodePostSync::createPostSyncArgs(args); if (args.inOrderExecInfo) { - EncodeDispatchKernel::setupPostSyncForInOrderExec(walkerCmd, args); + EncodePostSync::setupPostSyncForInOrderExec(walkerCmd, postSyncArgs); } else if (args.eventAddress) { - EncodeDispatchKernel::setupPostSyncForRegularEvent(walkerCmd, args); + EncodePostSync::setupPostSyncForRegularEvent(walkerCmd, postSyncArgs); } else { EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(walkerCmd); } @@ -482,19 +483,11 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } template -template -void EncodeDispatchKernel::setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) { - using POSTSYNC_DATA = decltype(Family::template getPostSyncType()); +template +void EncodePostSync::setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args) { + using POSTSYNC_DATA = decltype(Family::template getPostSyncType()); - auto &postSync = walkerCmd.getPostSync(); - - postSync.setDataportPipelineFlush(true); - postSync.setDataportSubsliceCacheFlush(true); - - if (NEO::debugManager.flags.ForcePostSyncL1Flush.get() != -1) { - postSync.setDataportPipelineFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); - postSync.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get()); - } + auto &postSync = cmd.getPostSync(); auto operationType = POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA; uint64_t gpuVa = args.eventAddress; @@ -508,30 +501,33 @@ void EncodeDispatchKernel::setupPostSyncForRegularEvent(WalkerType &walk } else { UNRECOVERABLE_IF(!(isAligned(gpuVa))); } + uint32_t mocs = getPostSyncMocs(args.device->getRootDeviceEnvironment(), args.dcFlushEnable); + setPostSyncData(postSync, operationType, gpuVa, immData, 0, mocs, false, false); - postSync.setOperation(operationType); - postSync.setImmediateData(immData); - postSync.setDestinationAddress(gpuVa); - - EncodeDispatchKernel::encodeL3FlushAfterPostSync(walkerCmd, args); - EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, args.device->getRootDeviceEnvironment(), args.dcFlushEnable); - EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, args); + encodeL3Flush(cmd, args); + adjustTimestampPacket(cmd, args); } template -template -inline void EncodeDispatchKernel::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) { - auto &postSyncData = walkerCmd.getPostSync(); +template +void EncodePostSync::setPostSyncDataCommon(PostSyncT &postSyncData, typename PostSyncT::OPERATION operation, uint64_t gpuVa, uint64_t immData) { + postSyncData.setOperation(operation); + postSyncData.setImmediateData(immData); + postSyncData.setDestinationAddress(gpuVa); +} + +template +inline uint32_t EncodePostSync::getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) { auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); - if (dcFlush) { - postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); - } else { - postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); + if (debugManager.flags.OverridePostSyncMocs.get() != -1) { + return debugManager.flags.OverridePostSyncMocs.get(); } - if (debugManager.flags.OverridePostSyncMocs.get() != -1) { - postSyncData.setMocs(debugManager.flags.OverridePostSyncMocs.get()); + if (dcFlush) { + return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + } else { + return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); } } diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 78842a64fb..b33fb094e1 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -590,20 +590,21 @@ inline void EncodeStoreMemory::programStoreDataImm(MI_STORE_DATA_IMM *cm } template -template -void EncodeDispatchKernel::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {} +uint32_t EncodePostSync::getPostSyncMocs(const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) { + return 0; +} template -template -void EncodeDispatchKernel::setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::setupPostSyncForRegularEvent(CommandType &cmd, const EncodePostSyncArgs &args) {} template -template -void EncodeDispatchKernel::encodeL3FlushAfterPostSync(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::encodeL3Flush(CommandType &cmd, const EncodePostSyncArgs &args) {} template -template -void EncodeDispatchKernel::setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::setupPostSyncForInOrderExec(CommandType &cmd, const EncodePostSyncArgs &args) {} template template @@ -708,8 +709,8 @@ template void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {} template -template -void EncodeDispatchKernel::adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {} +template +void EncodePostSync::adjustTimestampPacket(CommandType &cmd, const EncodePostSyncArgs &args) {} template <> size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device, bool isRcs) { diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 0d5de21200..627716a6ec 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -699,11 +699,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenDebugFlagSetWhenProgrammi } HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncoderTests, givenPreXeHpPlatformWhenSetupPostSyncMocsThenNothingHappen) { - using DefaultWalkerType = typename FamilyType::DefaultWalkerType; - - DefaultWalkerType walkerCmd{}; MockExecutionEnvironment executionEnvironment{}; - EXPECT_NO_THROW(EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, *executionEnvironment.rootDeviceEnvironments[0], false)); + uint32_t mocs; + EXPECT_NO_THROW(mocs = EncodePostSync::getPostSyncMocs(*executionEnvironment.rootDeviceEnvironments[0], false)); + EXPECT_EQ(0u, mocs); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSetupPostSyncMocsThenCorrect) { @@ -716,7 +715,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSe { DefaultWalkerType walkerCmd{}; - EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, rootDeviceEnvironment, dcFlush); + uint32_t mocs = 0; + EXPECT_NO_THROW(mocs = EncodePostSync::getPostSyncMocs(*executionEnvironment.rootDeviceEnvironments[0], dcFlush)); + EXPECT_NO_THROW(walkerCmd.getPostSync().setMocs(mocs)); auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); auto expectedMocs = dcFlush ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); @@ -728,7 +729,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenAtLeastXeHpPlatformWhenSe auto expectedMocs = 9u; debugManager.flags.OverridePostSyncMocs.set(expectedMocs); DefaultWalkerType walkerCmd{}; - EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, rootDeviceEnvironment, dcFlush); + uint32_t mocs = 0; + EXPECT_NO_THROW(mocs = EncodePostSync::getPostSyncMocs(*executionEnvironment.rootDeviceEnvironments[0], false)); + EXPECT_NO_THROW(walkerCmd.getPostSync().setMocs(mocs)); EXPECT_EQ(expectedMocs, walkerCmd.getPostSync().getMocs()); } } diff --git a/shared/test/unit_test/encoders/test_command_encoder.cpp b/shared/test/unit_test/encoders/test_command_encoder.cpp index 34187e5d96..f29b063c24 100644 --- a/shared/test/unit_test/encoders/test_command_encoder.cpp +++ b/shared/test/unit_test/encoders/test_command_encoder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -213,3 +213,19 @@ HWTEST2_F(CommandEncoderTest, givenPredicateBitSetWhenProgrammingBbStartThenSetC EncodeBatchBufferStartOrEnd::programBatchBufferStart(&cmdStream, 0, false, false, true); EXPECT_EQ(1u, cmd.getPredicationEnable()); } + +HWTEST_F(CommandEncoderTest, givenEncodePostSyncArgsWhenCallingRequiresSystemMemoryFenceThenCorrectValuesAreReturned) { + EncodePostSyncArgs args{}; + for (bool hostScopeSignalEvent : {true, false}) { + for (bool kernelUsingSystemAllocation : {true, false}) { + args.isHostScopeSignalEvent = hostScopeSignalEvent; + args.isKernelUsingSystemAllocation = kernelUsingSystemAllocation; + + if (hostScopeSignalEvent && kernelUsingSystemAllocation) { + EXPECT_TRUE(args.requiresSystemMemoryFence()); + } else { + EXPECT_FALSE(args.requiresSystemMemoryFence()); + } + } + } +} diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index f78ecbaf64..f7185b5df1 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -127,8 +127,8 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenDebugVariableWhenPostSyncIsPr auto inOrderExecInfo = InOrderExecInfo::create(deviceTagAllocator.getTag(), nullptr, *pDevice, 1, false); dispatchArgs.inOrderExecInfo = inOrderExecInfo.get(); - - EncodeDispatchKernel::template setupPostSyncForInOrderExec(walkerCmd, dispatchArgs); + auto postSyncArgs = EncodePostSync::createPostSyncArgs(dispatchArgs); + EncodePostSync::template setupPostSyncForInOrderExec(walkerCmd, postSyncArgs); auto &postSyncData = walkerCmd.getPostSync(); EXPECT_FALSE(postSyncData.getDataportPipelineFlush());