diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 139e263f09..8adea3a630 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -95,7 +95,7 @@ inline void HardwareInterface::programWalker( } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); - const auto &hwInfo = device.getHardwareInfo(); + constexpr bool heaplessModeEnabled = GfxFamily::template isHeaplessMode(); if constexpr (heaplessModeEnabled == false) { @@ -181,7 +181,7 @@ inline void HardwareInterface::programWalker( ImplicitScalingDispatchCommandArgs implicitScalingArgs{ workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa - &hwInfo, // hwInfo + &device, // device nullptr, // outWalkerPtr requiredPartitionDim, // requiredPartitionDim partitionCount, // partitionCount @@ -207,7 +207,7 @@ inline void HardwareInterface::programWalker( timestampPacketNode->setPacketsUsed(implicitScalingArgs.partitionCount); } } else { - EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, hwInfo, 1, workgroupSize, maxWgCountPerTile, requiredWalkOrder != 0); + EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, device, 1, workgroupSize, maxWgCountPerTile, requiredWalkOrder != 0); auto computeWalkerOnStream = commandStream.getSpaceForCmd(); *computeWalkerOnStream = walkerCmd; } diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index a9a4a28625..cc2c93fcae 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -463,7 +463,7 @@ HWTEST2_F(StaticWalkerPartitionFourTilesTests, givenPreWalkerSyncWhenStaticWalke &walkerCmd, totalBytesProgrammed, testArgs, - *defaultHwInfo); + this->rootDevice->getDevice()); taskStream->getSpace(totalBytesProgrammed); flushTaskStream(*taskStream); @@ -509,7 +509,7 @@ HWTEST2_F(StaticWalkerPartitionFourTilesTests, whenNoPreWalkerSyncThenAtomicsAre &walkerCmd, totalBytesProgrammed, testArgs, - *defaultHwInfo); + this->rootDevice->getDevice()); taskStream->getSpace(totalBytesProgrammed); flushTaskStream(*taskStream); diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp index 7bdba28957..b22cb2ba6c 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp @@ -1022,7 +1022,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandB &walkerCmd, totalBytesProgrammed, testArgs, - *defaultHwInfo); + this->device->getDevice()); taskStream->getSpace(totalBytesProgrammed); flushStream(); auto expectedGpuAddress = taskStream->getGraphicsAllocation()->getGpuAddress() + diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 7468e5673e..702359ea47 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -188,7 +188,7 @@ struct EncodeDispatchKernel { static void setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); template - static void setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); + static void setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); template static void setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 851bfb973f..47cc6d5ab3 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -684,7 +684,7 @@ void EncodeDispatchKernel::encodeEuSchedulingPolicy(InterfaceDescriptorT template template -void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} +void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} template template diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index 314cc13a90..00a2a81223 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -29,7 +29,7 @@ template void NEO::EncodeDispatchKernel::encodeEuSchedulingPolicy::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired); template void NEO::EncodeDispatchKernel::forceComputeWalkerPostSyncFlushWithWrite(Family::DefaultWalkerType &walkerCmd); -template void NEO::EncodeDispatchKernel::setWalkerRegionSettings(Family::DefaultWalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, +template void NEO::EncodeDispatchKernel::setWalkerRegionSettings(Family::DefaultWalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder); template void NEO::EncodeDispatchKernel::overrideDefaultValues(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor); template void NEO::EncodeDispatchKernel::encodeWalkerPostSyncFields(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl index 11d952d2cb..f8a9391e8a 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl @@ -90,6 +90,6 @@ void EncodeDispatchKernel::adjustTimestampPacket(WalkerType &walkerCmd, template template -void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const HardwareInfo &hwInfo, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} +void EncodeDispatchKernel::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t maxWgCountPerTile, bool requiredWalkOrder) {} } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index a855fe7421..170483a8b0 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -418,7 +418,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis ImplicitScalingDispatchCommandArgs implicitScalingArgs{ workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa - &hwInfo, // hwInfo + args.device, // device &args.outWalkerPtr, // outWalkerPtr args.requiredPartitionDim, // requiredPartitionDim args.partitionCount, // partitionCount @@ -438,7 +438,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.partitionCount = implicitScalingArgs.partitionCount; } else { args.partitionCount = 1; - EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder); + EncodeDispatchKernel::setWalkerRegionSettings(walkerCmd, *args.device, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder); if (!args.makeCommandView) { auto buffer = listCmdBufferStream->getSpaceForCmd(); diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index fa478b6ea3..0923f595be 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -17,6 +17,7 @@ struct WalkerPartitionArgs; namespace NEO { struct HardwareInfo; +class Device; class LinearStream; struct PipeControlArgs; struct RootDeviceEnvironment; @@ -39,7 +40,7 @@ struct ImplicitScalingHelper { struct ImplicitScalingDispatchCommandArgs { uint64_t workPartitionAllocationGpuVa = 0; - const HardwareInfo *hwInfo = nullptr; + const NEO::Device *device = nullptr; void **outWalkerPtr = nullptr; RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none; diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index c3a6023d4f..a036532ec6 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -126,7 +126,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS &walkerCmd, totalProgrammedSize, walkerPartitionArgs, - *dispatchCommandArgs.hwInfo); + *dispatchCommandArgs.device); } else { if (debugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { dispatchCommandArgs.partitionCount = debugManager.flags.ExperimentalSetWalkerPartitionCount.get(); @@ -142,7 +142,7 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS &walkerCmd, totalProgrammedSize, walkerPartitionArgs, - *dispatchCommandArgs.hwInfo); + *dispatchCommandArgs.device); } UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize); } diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index cfc8b36f47..195df384ed 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -495,7 +495,7 @@ template void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed, WalkerType *inputWalker, WalkerPartitionArgs &args, - const NEO::HardwareInfo &hwInfo) { + const NEO::Device &device) { WalkerType *computeWalker = nullptr; if (!args.blockDispatchToCommandBuffer) { computeWalker = putCommand(inputAddress, totalBytesProgrammed); @@ -527,7 +527,7 @@ void *programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgramm } NEO::EncodeDispatchKernel::setWalkerRegionSettings(*inputWalker, - hwInfo, + device, args.partitionCount, args.workgroupSize, args.maxWgCountPerTile, @@ -580,7 +580,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, WalkerType *inputWalker, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, - const NEO::HardwareInfo &hwInfo) { + const NEO::Device &device) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; @@ -650,7 +650,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, args.secondaryBatchBuffer); // Walker section - auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo); + auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, device); if (outWalkerPtr) { *outWalkerPtr = walkerPtr; } @@ -726,7 +726,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, WalkerType *inputWalker, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, - const NEO::HardwareInfo &hwInfo) { + const NEO::Device &device) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; @@ -747,7 +747,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, } } - auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, hwInfo); + auto walkerPtr = programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args, device); if (!args.blockDispatchToCommandBuffer) { if (outWalkerPtr) { diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 8ae9ab3986..3d5004bf19 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -821,6 +821,7 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment executionEnvironment{}; + executionEnvironment.incRefInternal(); auto rootExecEnv = executionEnvironment.rootDeviceEnvironments[0].get(); uint8_t buffer[128] = {}; @@ -837,9 +838,11 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur void *ptr = nullptr; + auto device = std::make_unique(&executionEnvironment, 0); + ImplicitScalingDispatchCommandArgs args{ 0, // workPartitionAllocationGpuVa - defaultHwInfo.get(), // hwInfo + device.get(), // device &ptr, // outWalkerPtr RequiredPartitionDim::x, // requiredPartitionDim partitionCount, // partitionCount diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 83828043be..57d1a71f5e 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -8,6 +8,7 @@ #include "shared/source/helpers/gfx_core_helper.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h" @@ -17,9 +18,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst using PostSyncType = typename WalkerType::PostSyncType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.partitionCount = 16u; testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -58,7 +62,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -163,6 +167,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; @@ -174,6 +179,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); @@ -183,7 +190,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); auto parsedOffset = 0u; @@ -248,6 +255,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]); @@ -259,6 +267,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto postWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); @@ -269,7 +279,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); auto parsedOffset = 0u; @@ -353,6 +363,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.semaphoreProgrammingRequired = true; testArgs.crossTileAtomicSynchronization = false; testArgs.tileCount = 4u; @@ -365,6 +376,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); WalkerPartition::constructStaticallyPartitionedCommandBuffer(cmdBuffer, @@ -373,7 +386,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(controlSectionOffset, totalBytesProgrammed); auto parsedOffset = 0u; @@ -442,6 +455,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; testArgs.emitSelfCleanup = true; @@ -454,6 +468,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); @@ -465,7 +481,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -596,6 +612,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.crossTileAtomicSynchronization = false; testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; @@ -609,6 +626,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); @@ -620,7 +639,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -751,6 +770,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; testArgs.useAtomicsForSelfCleanup = true; @@ -764,6 +784,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); @@ -775,7 +797,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -912,6 +934,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.crossTileAtomicSynchronization = false; testArgs.tileCount = 4u; testArgs.partitionCount = testArgs.tileCount; @@ -926,6 +949,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit testArgs.workPartitionAllocationGpuVa = 0x8000444000; auto walker = createWalker(postSyncAddress); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = cmdBufferGpuAddress + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); @@ -937,7 +962,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -1075,6 +1100,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti using PostSyncType = typename WalkerType::PostSyncType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.crossTileAtomicSynchronization = false; testArgs.semaphoreProgrammingRequired = true; testArgs.tileCount = 4u; @@ -1082,6 +1108,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti testArgs.emitBatchBufferEnd = true; testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + checkForProperCmdBufferAddressOffset = false; uint64_t gpuVirtualAddress = 0x8000123000; uint64_t postSyncAddress = 0x8000456000; @@ -1118,7 +1146,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1232,6 +1260,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe uint64_t expectedControlSectionOffset = sizeof(WalkerType); + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); @@ -1241,7 +1271,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -1274,6 +1304,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::LOAD_REGISTER_MEM) + sizeof(WalkerType); + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + uint32_t totalBytesProgrammed{}; const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(testArgs); EXPECT_EQ(expectedControlSectionOffset, controlSectionOffset); @@ -1283,7 +1315,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -1309,6 +1341,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr using WalkerType = typename FamilyType::DefaultWalkerType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); WalkerType walker; walker = FamilyType::template getInitGpuWalker(); walker.setThreadGroupIdStartingX(1u); @@ -1327,6 +1360,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr testArgs.workPartitionAllocationGpuVa = 0x800BADA55000; testArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, *mockExecutionEnvironment.rootDeviceEnvironments[0]); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + auto expectedCommandUsedSize = sizeof(WalkerPartition::LOAD_REGISTER_IMM) + sizeof(WalkerPartition::MI_ATOMIC) * 2 + sizeof(WalkerPartition::LOAD_REGISTER_REG) + @@ -1352,7 +1387,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp index 91ab084d2a..c0b93186d8 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp @@ -8,6 +8,7 @@ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h" @@ -37,13 +38,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst walker = FamilyType::template getInitGpuWalker(); walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X); + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); auto totalProgrammedSize = computeControlSectionOffset(testArgs) + sizeof(BatchBufferControlData); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); @@ -417,6 +420,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramBatchBufferStartC HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhenItIsCalledThenWalkerIsProperlyProgrammed) { using WalkerType = typename FamilyType::DefaultWalkerType; + auto device = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get())); + auto expectedUsedSize = sizeof(WalkerType); WalkerType walker; walker = FamilyType::template getInitGpuWalker(); @@ -429,7 +434,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen WalkerPartition::WalkerPartitionArgs args = {}; args.partitionCount = 2; args.tileCount = 2; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); auto walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); @@ -443,7 +448,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen args = {}; args.partitionCount = 2; args.tileCount = 2; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); @@ -455,7 +460,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen args = {}; args.partitionCount = 2; args.tileCount = 2; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); @@ -468,7 +473,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramComputeWalkerWhen args = {}; args.partitionCount = 1; args.tileCount = 2; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); @@ -909,6 +914,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe using WalkerType = typename FamilyType::DefaultWalkerType; using PostSyncType = typename WalkerType::PostSyncType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; @@ -948,6 +954,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe 2 * sizeof(WalkerPartition::MI_ATOMIC) + 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + testArgs.tileCount = 4u; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, nullptr, @@ -955,7 +963,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1119,6 +1127,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse using PostSyncType = typename WalkerType::PostSyncType; MockExecutionEnvironment mockExecutionEnvironment{}; + mockExecutionEnvironment.incRefInternal(); testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; @@ -1159,6 +1168,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse 2 * sizeof(WalkerPartition::MI_ATOMIC) + 2 * sizeof(WalkerPartition::MI_SEMAPHORE_WAIT); + auto device = std::make_unique(&mockExecutionEnvironment, 0); + testArgs.tileCount = 4u; WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, nullptr, @@ -1166,7 +1177,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1371,13 +1382,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP auto totalProgrammedSize = cleanupSectionOffset; + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(cmdBuffer, nullptr, gpuVirtualAddress, &walker, totalBytesProgrammed, testArgs, - *defaultHwInfo); + *device); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1772,6 +1785,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi walker.setThreadGroupIdYDimension(1u); walker.setThreadGroupIdZDimension(1u); + auto device = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get())); + bool forceExecutionOnSingleTile = false; walker.setPartitionType(WalkerType::PARTITION_TYPE::PARTITION_TYPE_X); void *walkerCommandAddress = cmdBufferAddress; @@ -1779,7 +1794,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi args.partitionCount = 2; args.tileCount = 2; args.forceExecutionOnSingleTile = forceExecutionOnSingleTile; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); auto walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); @@ -1793,7 +1808,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenForceExecutionOnSingleTi args.partitionCount = 2; args.tileCount = 2; args.forceExecutionOnSingleTile = forceExecutionOnSingleTile; - programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, testHardwareInfo); + programPartitionedWalker(cmdBufferAddress, totalBytesProgrammed, &walker, args, *device); walkerCommand = genCmdCast(walkerCommandAddress); ASSERT_NE(nullptr, walkerCommand); diff --git a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp index 516b663978..09e82115e7 100644 --- a/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp +++ b/shared/test/unit_test/fixtures/implicit_scaling_fixture.cpp @@ -43,7 +43,7 @@ void ImplicitScalingFixture::tearDown() { ImplicitScalingDispatchCommandArgs ImplicitScalingFixture::createDispatchCommandArgs(uint64_t workPartitionAllocationAddress, uint32_t partitionCount) { ImplicitScalingDispatchCommandArgs args{ workPartitionAllocationAddress, // workPartitionAllocationGpuVa - defaultHwInfo.get(), // hwInfo + this->pDevice, // device nullptr, // outWalkerPtr NEO::RequiredPartitionDim::none, // requiredPartitionDim partitionCount, // partitionCount