diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 273434e3b1..af9b586004 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1026,7 +1026,10 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(uintptr_t NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; - NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + auto &rootDeviceEnvironment = device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]; + NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *rootDeviceEnvironment); + auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); + commandContainer.addToResidencyContainer(dummyAllocation); return ZE_RESULT_SUCCESS; } @@ -1069,12 +1072,16 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; appendEventForProfiling(signalEvent, true); - bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()], blitProperties.isSystemMemoryPoolUsed); + auto &rootDeviceEnvironment = device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]; + bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, *rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed); if (copyRegionPreferred) { - NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *rootDeviceEnvironment); } else { - NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *rootDeviceEnvironment); } + auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); + commandContainer.addToResidencyContainer(dummyAllocation); + appendSignalEventPostWalker(signalEvent); return ZE_RESULT_SUCCESS; } @@ -1101,7 +1108,11 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph commandContainer.addToResidencyContainer(clearColorAllocation); appendEventForProfiling(signalEvent, true); - NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + auto &rootDeviceEnvironment = device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]; + NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *rootDeviceEnvironment); + auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); + commandContainer.addToResidencyContainer(dummyAllocation); + appendSignalEventPostWalker(signalEvent); return ZE_RESULT_SUCCESS; } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 96adb6de0d..16f030e0b4 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -733,7 +733,8 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeThenCal const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -773,7 +774,8 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectsubmissionEnabledEstima const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START); - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -811,7 +813,8 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeForWrit const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -850,7 +853,8 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectSubmissionEnabledEstima const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -885,7 +889,8 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectSubmissionEnabledEstima } HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsThenAddMiFlushDw) { - size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { expectedBaseSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -907,7 +912,8 @@ HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommands } HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsWithProfilingThenAddMiFlushDw) { - size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK) + + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs) + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); if (BlitCommandsHelper::miArbCheckWaRequired()) { @@ -939,7 +945,8 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd csrDependencies.timestampPacketContainer.push_back(×tamp0); csrDependencies.timestampPacketContainer.push_back(×tamp1); - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -961,7 +968,8 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd HWTEST_F(BcsTests, givenImageAndBufferWhenEstimateBlitCommandSizeThenReturnCorrectCommandSize) { for (auto isImage : {false, true}) { - auto expectedSize = sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + auto expectedSize = EncodeMiArbCheck::getCommandSizeWithWa(waArgs); expectedSize += isImage ? sizeof(typename FamilyType::XY_BLOCK_COPY_BLT) : sizeof(typename FamilyType::XY_COPY_BLT); if (BlitCommandsHelper::miArbCheckWaRequired()) { @@ -979,7 +987,6 @@ HWTEST_F(BcsTests, givenImageAndBufferWhenEstimateBlitCommandSizeThenReturnCorre } HWTEST_F(BcsTests, givenImageAndBufferBlitDirectionsWhenIsImageOperationIsCalledThenReturnCorrectValue) { - BlitProperties blitProperties{}; std::pair params[] = {{false, BlitterConstants::BlitDirection::HostPtrToBuffer}, {false, BlitterConstants::BlitDirection::BufferToHostPtr}, @@ -1065,7 +1072,21 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC EXPECT_NE(nullptr, miFlush); } } + const auto &productHelper = pDevice->getProductHelper(); + if (productHelper.isDummyBlitWaRequired()) { + auto dummyBltCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, dummyBltCmd); + auto expectedX2 = 1u; + auto expectedY2 = 4u; + uint32_t expectedPitch = 4096u; + auto expectedDestinationBaseAddress = pDevice->getRootDeviceEnvironment().getDummyAllocation()->getGpuAddress(); + + EXPECT_EQ(expectedDestinationBaseAddress, dummyBltCmd->getDestinationBaseAddress()); + EXPECT_EQ(expectedX2, dummyBltCmd->getDestinationX2CoordinateRight()); + EXPECT_EQ(expectedY2, dummyBltCmd->getDestinationY2CoordinateBottom()); + EXPECT_EQ(expectedPitch, dummyBltCmd->getDestinationPitch()); + } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); @@ -1203,6 +1224,7 @@ struct BcsDetaliedTests : public BcsTests, using BcsDetaliedTestsWithParams = BcsDetaliedTests>; HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAddresseForWriteReadBufferRect) { + auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); @@ -1296,7 +1318,21 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP EXPECT_NE(nullptr, miFlush); } } + const auto &productHelper = pDevice->getProductHelper(); + if (productHelper.isDummyBlitWaRequired()) { + auto dummyBltCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, dummyBltCmd); + auto expectedX2 = 1u; + auto expectedY2 = 4u; + uint32_t expectedPitch = 4096u; + auto expectedDestinationBaseAddress = pDevice->getRootDeviceEnvironment().getDummyAllocation()->getGpuAddress(); + + EXPECT_EQ(expectedDestinationBaseAddress, dummyBltCmd->getDestinationBaseAddress()); + EXPECT_EQ(expectedX2, dummyBltCmd->getDestinationX2CoordinateRight()); + EXPECT_EQ(expectedY2, dummyBltCmd->getDestinationY2CoordinateBottom()); + EXPECT_EQ(expectedPitch, dummyBltCmd->getDestinationPitch()); + } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); @@ -1395,6 +1431,11 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP EXPECT_NE(nullptr, miFlush); } } + const auto &productHelper = pDevice->getProductHelper(); + if (productHelper.isDummyBlitWaRequired()) { + auto dummyBltCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, dummyBltCmd); + } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); @@ -1486,7 +1527,21 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP EXPECT_NE(nullptr, miFlush); } } + const auto &productHelper = pDevice->getProductHelper(); + if (productHelper.isDummyBlitWaRequired()) { + auto dummyBltCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, dummyBltCmd); + auto expectedX2 = 1u; + auto expectedY2 = 4u; + uint32_t expectedPitch = 4096u; + auto expectedDestinationBaseAddress = pDevice->getRootDeviceEnvironment().getDummyAllocation()->getGpuAddress(); + + EXPECT_EQ(expectedDestinationBaseAddress, dummyBltCmd->getDestinationBaseAddress()); + EXPECT_EQ(expectedX2, dummyBltCmd->getDestinationX2CoordinateRight()); + EXPECT_EQ(expectedY2, dummyBltCmd->getDestinationY2CoordinateBottom()); + EXPECT_EQ(expectedPitch, dummyBltCmd->getDestinationPitch()); + } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 6e58eb84c3..fa6620ad15 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -37,7 +37,8 @@ using namespace NEO; HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -72,7 +73,8 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -101,7 +103,8 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -136,7 +139,8 @@ HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBuffer auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment, false)); - size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{true, &(pDevice->getRootDeviceEnvironmentRef())}; + size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + EncodeMiArbCheck::getCommandSizeWithWa(waArgs); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); @@ -501,7 +505,13 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations EXPECT_TRUE(csr.isMadeResident(graphicsAllocation1)); EXPECT_TRUE(csr.isMadeResident(graphicsAllocation2)); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); + EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled); + auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironmentRef(); + if (getHelper().isDummyBlitWaRequired()) { + residentAllocationsNum++; + EXPECT_TRUE(csr.isMadeResident(rootDeviceEnvironment.getDummyAllocation())); + } if (csr.clearColorAllocation) { EXPECT_TRUE(csr.isMadeResident(csr.clearColorAllocation)); residentAllocationsNum++; @@ -561,6 +571,11 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation2)); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->getTagAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->globalFenceAllocation)); + auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironmentRef(); + if (getHelper().isDummyBlitWaRequired()) { + EXPECT_TRUE(bcsCsr->isMadeResident(rootDeviceEnvironment.getDummyAllocation())); + residentAllocationsNum++; + } if (bcsCsr->clearColorAllocation) { EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->clearColorAllocation)); residentAllocationsNum++; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h index 0c5acf8472..5b5a6970bb 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/timestamp_packet_container.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" @@ -15,6 +16,7 @@ using namespace NEO; struct BcsTests : public Test { void SetUp() override { + DebugManager.flags.ForceDummyBlitWa.set(-1); Test::SetUp(); context = std::make_unique(pClDevice); } @@ -34,4 +36,5 @@ struct BcsTests : public Test { TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; std::unique_ptr context; + DebugManagerStateRestore dbgRestore; }; diff --git a/opencl/test/unit_test/device/device_tests.cpp b/opencl/test/unit_test/device/device_tests.cpp index 9d2ebba04e..013afd6c1f 100644 --- a/opencl/test/unit_test/device/device_tests.cpp +++ b/opencl/test/unit_test/device/device_tests.cpp @@ -508,14 +508,9 @@ HWTEST_F(DeviceHwTest, givenGfxCoreHelperInputWhenInitializingCsrThenCreatePageT localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; - MockExecutionEnvironment executionEnvironment; - executionEnvironment.prepareRootDeviceEnvironments(3); + MockExecutionEnvironment executionEnvironment(&localHwInfo, true, 3u); executionEnvironment.incRefInternal(); - for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { - executionEnvironment.rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(&localHwInfo); - executionEnvironment.rootDeviceEnvironments[i]->initGmm(); - } - executionEnvironment.initializeMemoryManager(); + std::unique_ptr device; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 0)); auto &csr0 = device->getUltCommandStreamReceiver(); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 667ece87ec..a139199f7c 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -16,6 +16,7 @@ #include "shared/source/kernel/kernel_execution_type.h" #include +#include namespace NEO { enum class SlmPolicy; @@ -472,13 +473,37 @@ struct EncodeMemoryPrefetch { static size_t getSizeForMemoryPrefetch(size_t size, const RootDeviceEnvironment &rootDeviceEnvironment); }; +struct EncodeDummyBlitWaArgs { + bool isBcs = false; + RootDeviceEnvironment *rootDeviceEnvironment = nullptr; +}; + template struct EncodeMiArbCheck { using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; - static void program(LinearStream &commandStream); - static void adjust(MI_ARB_CHECK &miArbCheck); + static void programWithWa(LinearStream &commandStream, std::optional preParserDisable, EncodeDummyBlitWaArgs &waArgs); + static size_t getCommandSizeWithWa(const EncodeDummyBlitWaArgs &waArgs); + + protected: + static void program(LinearStream &commandStream, std::optional preParserDisable); static size_t getCommandSize(); + static void adjust(MI_ARB_CHECK &miArbCheck, std::optional preParserDisable); +}; + +template +struct EncodeWA { + static void encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, + const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs); + static size_t getAdditionalPipelineSelectSize(Device &device, bool isRcs); + + static void addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, + const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs); + static void setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args); + + static void addPipeControlBeforeStateBaseAddress(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlushRequired); + + static void adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane); }; template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 7d6862f176..26af6ddd8d 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -15,6 +15,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/bindless_heaps_helper.h" +#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/definitions/mi_flush_args.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_id_gen.h" @@ -978,17 +979,27 @@ template inline size_t EncodeMemoryPrefetch::getSizeForMemoryPrefetch(size_t size, const RootDeviceEnvironment &rootDeviceEnvironment) { return 0u; } template -void EncodeMiArbCheck::program(LinearStream &commandStream) { +void EncodeMiArbCheck::program(LinearStream &commandStream, std::optional preParserDisable) { MI_ARB_CHECK cmd = Family::cmdInitArbCheck; - EncodeMiArbCheck::adjust(cmd); - + EncodeMiArbCheck::adjust(cmd, preParserDisable); auto miArbCheckStream = commandStream.getSpaceForCmd(); *miArbCheckStream = cmd; } template -inline size_t EncodeMiArbCheck::getCommandSize() { return sizeof(MI_ARB_CHECK); } +size_t EncodeMiArbCheck::getCommandSize() { return sizeof(MI_ARB_CHECK); } + +template +void EncodeMiArbCheck::programWithWa(LinearStream &commandStream, std::optional preParserDisable, EncodeDummyBlitWaArgs &waArgs) { + BlitCommandsHelper::dispatchDummyBlit(commandStream, waArgs); + EncodeMiArbCheck::program(commandStream, preParserDisable); +} + +template +size_t EncodeMiArbCheck::getCommandSizeWithWa(const EncodeDummyBlitWaArgs &waArgs) { + return EncodeMiArbCheck::getCommandSize() + BlitCommandsHelper::getDummyBlitSize(waArgs); +} template inline void EncodeNoop::alignToCacheLine(LinearStream &commandStream) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 353b5f997e..15c26b9dcc 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -518,7 +518,7 @@ inline void EncodeWA::addPipeControlBeforeStateBaseAddress(LinearStre } template -inline void EncodeWA::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, GMM_YUV_PLANE_ENUM plane) { +inline void EncodeWA::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane) { } template @@ -577,7 +577,7 @@ inline void EncodeStoreMemory::programStoreDataImm(MI_STORE_DATA_IMM *cm } template -inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck) { +inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck, std::optional preParserDisable) { } template @@ -590,5 +590,4 @@ template size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { return iddCount * sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA); } - } // namespace NEO diff --git a/shared/source/command_container/command_encoder_tgllp_and_later.inl b/shared/source/command_container/command_encoder_tgllp_and_later.inl index 65126cbe28..fb298c77a3 100644 --- a/shared/source/command_container/command_encoder_tgllp_and_later.inl +++ b/shared/source/command_container/command_encoder_tgllp_and_later.inl @@ -24,4 +24,14 @@ void EncodeWA::addPipeControlBeforeStateBaseAddress(LinearStream &comman NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(commandStream, args, rootDeviceEnvironment, isRcs); } +template <> +inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck, std::optional preParserDisable) { + if (DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get() != -1) { + preParserDisable = !DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get(); + } + if (preParserDisable.has_value()) { + miArbCheck.setPreParserDisable(preParserDisable.value()); + } +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 6ea9f98dbd..9d6686f03a 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -15,7 +15,6 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" -#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/constants.h" @@ -769,7 +768,8 @@ inline void EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(Line } template -void EncodeWA::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, GMM_YUV_PLANE_ENUM plane) { +void EncodeWA::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane) { + static_assert(sizeof(plane) == sizeof(GMM_YUV_PLANE_ENUM)); if (plane == GMM_PLANE_Y) { compressionFormat &= 0xf; } else if ((plane == GMM_PLANE_U) || (plane == GMM_PLANE_V)) { @@ -798,13 +798,6 @@ inline void EncodeStoreMemory::programStoreDataImm(MI_STORE_DATA_IMM *cm *cmdBuffer = storeDataImmediate; } -template -inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck) { - if (DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get() != -1) { - miArbCheck.setPreParserDisable(!DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get()); - } -} - template inline void EncodeStoreMMIO::appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition) { storeRegMem->setMmioRemapEnable(true); @@ -818,5 +811,4 @@ template size_t EncodeDispatchKernel::additionalSizeRequiredDsh(uint32_t iddCount) { return 0u; } - } // namespace NEO diff --git a/shared/source/command_container/encode_surface_state.h b/shared/source/command_container/encode_surface_state.h index 2d6faba910..8572865c85 100644 --- a/shared/source/command_container/encode_surface_state.h +++ b/shared/source/command_container/encode_surface_state.h @@ -61,19 +61,4 @@ struct EncodeSurfaceState { static void appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState); }; -template -struct EncodeWA { - static void encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, - const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs); - static size_t getAdditionalPipelineSelectSize(Device &device, bool isRcs); - - static void addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, - const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs); - static void setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args); - - static void addPipeControlBeforeStateBaseAddress(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlushRequired); - - static void adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, GMM_YUV_PLANE_ENUM plane); -}; - } // namespace NEO \ No newline at end of file diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index b62f124dec..e92156cd77 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1181,8 +1181,9 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert auto lock = obtainUniqueOwnership(); bool blitterDirectSubmission = this->isBlitterDirectSubmissionEnabled(); auto debugPauseEnabled = PauseOnGpuProperties::featureEnabled(DebugManager.flags.PauseOnBlitCopy.get()); + auto &rootDeviceEnvironment = this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]; auto &commandStream = getCS(BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, profilingEnabled, debugPauseEnabled, blitterDirectSubmission, - *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex])); + *rootDeviceEnvironment)); auto commandStreamStart = commandStream.getUsed(); auto newTaskCount = taskCount + 1; latestSentTaskCount = newTaskCount; @@ -1217,8 +1218,11 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert BlitCommandsHelper::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket); } - BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); - + BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *rootDeviceEnvironment); + auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); + if (dummyAllocation) { + makeResident(*dummyAllocation); + } if (blitProperties.outputTimestampPacket) { if (profilingEnabled) { MiFlushArgs args; diff --git a/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl index bdd8499032..ee978f0558 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl @@ -5,7 +5,7 @@ * */ -#include "shared/source/command_container/encode_surface_state.h" +#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/helpers/gfx_core_helper.h" diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index fc8f729723..0ae86734e8 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -523,3 +523,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled") /* Binary Cache */ DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation") + +/* WORKAROUND FLAGS */ +DECLARE_DEBUG_VARIABLE(int32_t, ForceDummyBlitWa, 0, "-1: default, 0: disabled, 1: enabled, Forces a workaround with dummy blits, driver adds an extra blit before command MI_ARB_CHECK on bcs") diff --git a/shared/source/debugger/debugger_l0.inl b/shared/source/debugger/debugger_l0.inl index 2618ae9f77..3dceb92009 100644 --- a/shared/source/debugger/debugger_l0.inl +++ b/shared/source/debugger/debugger_l0.inl @@ -20,7 +20,6 @@ void DebuggerL0Hw::captureStateBaseAddress(NEO::LinearStream &cmdStre using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; const auto gmmHelper = device->getGmmHelper(); const auto gpuAddress = gmmHelper->decanonize(sbaTrackingGpuVa.address); diff --git a/shared/source/debugger/debugger_l0_tgllp_and_later.inl b/shared/source/debugger/debugger_l0_tgllp_and_later.inl index ec8f3fa3f2..a9e043f489 100644 --- a/shared/source/debugger/debugger_l0_tgllp_and_later.inl +++ b/shared/source/debugger/debugger_l0_tgllp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,12 +9,12 @@ namespace NEO { template size_t DebuggerL0Hw::getSbaTrackingCommandsSize(size_t trackedAddressCount) { if (singleAddressSpaceSbaTracking) { - + EncodeDummyBlitWaArgs waArgs{false}; constexpr uint32_t aluCmdSize = sizeof(typename GfxFamily::MI_MATH) + sizeof(typename GfxFamily::MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; - return 2 * (sizeof(typename GfxFamily::MI_ARB_CHECK) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)) + + return 2 * (EncodeMiArbCheck::getCommandSizeWithWa(waArgs) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)) + trackedAddressCount * (sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + aluCmdSize + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) + 3 * sizeof(typename GfxFamily::MI_STORE_DATA_IMM) + - sizeof(typename GfxFamily::MI_ARB_CHECK) + + EncodeMiArbCheck::getCommandSizeWithWa(waArgs) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); } return trackedAddressCount * NEO::EncodeStoreMemory::getStoreDataImmSize(); @@ -25,7 +25,6 @@ void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO:: using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_NOOP = typename GfxFamily::MI_NOOP; @@ -58,12 +57,9 @@ void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO:: const auto cmdStreamCpuBase = reinterpret_cast(cmdStream.getCpuBase()); auto bbLevel = useFirstLevelBB ? MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH : MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH; - + EncodeDummyBlitWaArgs waArgs{false}; if (fieldOffsetAndValue.size()) { - auto arb = cmdStream.getSpaceForCmd(); - auto arbCmd = GfxFamily::cmdInitArbCheck; - arbCmd.setPreParserDisable(true); - *arb = arbCmd; + EncodeMiArbCheck::programWithWa(cmdStream, true, waArgs); // Jump to SDI command that is modified auto newBuffer = cmdStream.getSpaceForCmd(); @@ -91,10 +87,7 @@ void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO:: auto miStoreDataSettingSbaBufferAddress = cmdStream.getSpaceForCmd(); auto miStoreDataSettingSbaBufferAddress2 = cmdStream.getSpaceForCmd(); - auto arb = cmdStream.getSpaceForCmd(); - auto arbCmd = GfxFamily::cmdInitArbCheck; - arbCmd.setPreParserDisable(true); - *arb = arbCmd; + EncodeMiArbCheck::programWithWa(cmdStream, true, waArgs); // Jump to SDI command that is modified auto newBuffer = cmdStream.getSpaceForCmd(); @@ -155,10 +148,7 @@ void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO:: bbCmd.setSecondLevelBatchBuffer(bbLevel); *previousBuffer = bbCmd; - auto arbCmd = GfxFamily::cmdInitArbCheck; - auto arb = cmdStream.getSpaceForCmd(); - arbCmd.setPreParserDisable(false); - *arb = arbCmd; + EncodeMiArbCheck::programWithWa(cmdStream, false, waArgs); } } diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 44137d72ba..aef74eccb4 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -40,6 +40,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex) : executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex) { this->executionEnvironment->incRefInternal(); + this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setDummyBlitProperties(rootDeviceIndex); if (DebugManager.flags.NumberOfRegularContextsPerEngine.get() > 1) { this->numberOfRegularContextsPerEngine = static_cast(DebugManager.flags.NumberOfRegularContextsPerEngine.get()); diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 264c023f4c..df8cd8ee46 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -212,7 +212,8 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; - *schedulerCmdStream.getSpaceForCmd() = GfxFamily::cmdInitArbCheck; + EncodeDummyBlitWaArgs waArgs{false}; + EncodeMiArbCheck::programWithWa(schedulerCmdStream, std::nullopt, waArgs); if (DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) { currentRelaxedOrderingQueueSize = static_cast(DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get()); diff --git a/shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl b/shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl index 9b2125d9ae..e447b37115 100644 --- a/shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl +++ b/shared/source/direct_submission/direct_submission_prefetch_mitigation_xe_hp_core_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ inline size_t DirectSubmissionHw::getSizePrefetchMitigati template inline size_t DirectSubmissionHw::getSizeDisablePrefetcher() { - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; - return sizeof(MI_ARB_CHECK); + EncodeDummyBlitWaArgs waArgs{false}; + return EncodeMiArbCheck::getCommandSizeWithWa(waArgs); } } // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl b/shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl index 7baa553c97..5c20c3b324 100644 --- a/shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl +++ b/shared/source/direct_submission/direct_submission_prefetcher_pvc_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,16 +11,10 @@ namespace NEO { template inline void DirectSubmissionHw::dispatchDisablePrefetcher(bool disable) { - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; if (isDisablePrefetcherRequired) { - MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck; - arbCheck.setPreParserDisable(disable); - - EncodeMiArbCheck::adjust(arbCheck); - - MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd(); - *arbCheckSpace = arbCheck; + EncodeDummyBlitWaArgs waArgs{false}; + EncodeMiArbCheck::programWithWa(ringCommandStream, disable, waArgs); } } diff --git a/shared/source/direct_submission/direct_submission_prefetcher_xe_hp_core_and_later.inl b/shared/source/direct_submission/direct_submission_prefetcher_xe_hp_core_and_later.inl index 686761b204..5c20c3b324 100644 --- a/shared/source/direct_submission/direct_submission_prefetcher_xe_hp_core_and_later.inl +++ b/shared/source/direct_submission/direct_submission_prefetcher_xe_hp_core_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,16 +11,10 @@ namespace NEO { template inline void DirectSubmissionHw::dispatchDisablePrefetcher(bool disable) { - using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; if (isDisablePrefetcherRequired) { - MI_ARB_CHECK arbCheck = GfxFamily::cmdInitArbCheck; - arbCheck.setPreFetchDisable(disable); - - EncodeMiArbCheck::adjust(arbCheck); - - MI_ARB_CHECK *arbCheckSpace = ringCommandStream.getSpaceForCmd(); - *arbCheckSpace = arbCheck; + EncodeDummyBlitWaArgs waArgs{false}; + EncodeMiArbCheck::programWithWa(ringCommandStream, disable, waArgs); } } diff --git a/shared/source/direct_submission/relaxed_ordering_helper.h b/shared/source/direct_submission/relaxed_ordering_helper.h index 2fe2e3d48a..ec3c8db51f 100644 --- a/shared/source/direct_submission/relaxed_ordering_helper.h +++ b/shared/source/direct_submission/relaxed_ordering_helper.h @@ -67,8 +67,7 @@ struct StaticSchedulerSizeAndOffsetSection { (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate::getCmdSize(); static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize; - static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart()); - + static constexpr uint64_t drainRequestSectionSize = (2 * EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart()); static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize; static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START); diff --git a/shared/source/execution_environment/root_device_environment.cpp b/shared/source/execution_environment/root_device_environment.cpp index d31dcb6063..93b7dc2093 100644 --- a/shared/source/execution_environment/root_device_environment.cpp +++ b/shared/source/execution_environment/root_device_environment.cpp @@ -25,6 +25,8 @@ #include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/memory_manager/allocation_properties.h" +#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/hw_info_config.h" @@ -195,6 +197,33 @@ bool RootDeviceEnvironment::isNumberOfCcsLimited() const { return limitedNumberOfCcs; } +void RootDeviceEnvironment::initDummyAllocation() { + std::call_once(isDummyAllocationInitialized, [this]() { + auto customDeleter = [this](GraphicsAllocation *dummyAllocation) { + this->executionEnvironment.memoryManager->freeGraphicsMemory(dummyAllocation); + }; + auto dummyBlitAllocation = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( + *this->dummyBlitProperties.get()); + this->dummyAllocation = GraphicsAllocationUniquePtrType(dummyBlitAllocation, customDeleter); + }); +} + +void RootDeviceEnvironment::setDummyBlitProperties(uint32_t rootDeviceIndex) { + size_t size = 4 * 4096u; + this->dummyBlitProperties = std::make_unique( + rootDeviceIndex, + true, + size, + NEO::AllocationType::BUFFER, + false, + false, + systemMemoryBitfield); +} + +GraphicsAllocation *RootDeviceEnvironment::getDummyAllocation() const { + return dummyAllocation.get(); +} + template HelperType &RootDeviceEnvironment::getHelper() const { if constexpr (std::is_same_v) { diff --git a/shared/source/execution_environment/root_device_environment.h b/shared/source/execution_environment/root_device_environment.h index 46a7a66633..5b4422dcb7 100644 --- a/shared/source/execution_environment/root_device_environment.h +++ b/shared/source/execution_environment/root_device_environment.h @@ -10,6 +10,7 @@ #include "shared/source/helpers/affinity_mask.h" #include "shared/source/helpers/options.h" +#include #include #include @@ -35,7 +36,9 @@ class ProductHelper; class GfxCoreHelper; class ApiGfxCoreHelper; class CompilerProductHelper; +class GraphicsAllocation; +struct AllocationProperties; struct HardwareInfo; struct RootDeviceEnvironment { @@ -58,6 +61,8 @@ struct RootDeviceEnvironment { void initGmm(); void initDebugger(); void initDebuggerL0(Device *neoDevice); + MOCKABLE_VIRTUAL void initDummyAllocation(); + void setDummyBlitProperties(uint32_t rootDeviceIndex); MOCKABLE_VIRTUAL void prepareForCleanup() const; MOCKABLE_VIRTUAL bool initAilConfiguration(); @@ -77,6 +82,7 @@ struct RootDeviceEnvironment { template HelperType &getHelper() const; const ProductHelper &getProductHelper() const; + GraphicsAllocation *getDummyAllocation() const; std::unique_ptr sipKernels[static_cast(SipKernelType::COUNT)]; std::unique_ptr gmmHelper; @@ -100,7 +106,12 @@ struct RootDeviceEnvironment { AffinityMaskHelper deviceAffinityMask{true}; protected: + using GraphicsAllocationUniquePtrType = std::unique_ptr>; + GraphicsAllocationUniquePtrType dummyAllocation = nullptr; + bool limitedNumberOfCcs = false; + std::once_flag isDummyAllocationInitialized; + std::unique_ptr dummyBlitProperties; private: std::mutex mtx; diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index d669d53216..e90b290feb 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -22,6 +22,7 @@ enum class DebugPauseState : uint32_t; struct HardwareInfo; struct RootDeviceEnvironment; class ProductHelper; +struct EncodeDummyBlitWaArgs; template struct BlitCommandsHelper { @@ -32,8 +33,8 @@ struct BlitCommandsHelper { static uint64_t getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static void dispatchPreBlitCommand(LinearStream &linearStream, const ProductHelper &productHelper); static size_t estimatePreBlitCommandSize(); - static void dispatchPostBlitCommand(LinearStream &linearStream, const ProductHelper &productHelper); - static size_t estimatePostBlitCommandSize(); + static void dispatchPostBlitCommand(LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment); + static size_t estimatePostBlitCommandSize(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t estimateBlitCommandSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, bool isImage, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, @@ -44,13 +45,16 @@ struct BlitCommandsHelper { static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); static uint64_t calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); - static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); - static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); - static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); - static void dispatchBlitCommandsForImageRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); + static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment); + static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment); + static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment); + static void dispatchBlitCommandsForImageRegion(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint64_t offset, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment); template static void dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint64_t offset, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth); + static void dispatchDummyBlit(LinearStream &linearStream, EncodeDummyBlitWaArgs &waArgs); + static size_t getDummyBlitSize(const EncodeDummyBlitWaArgs &waArgs); + static bool isDummyBlitWaNeeded(const EncodeDummyBlitWaArgs &waArgs); template static void appendBlitCommandsForBuffer(const BlitProperties &blitProperties, T &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 4fe2edb1e8..8b31801d71 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -58,12 +58,15 @@ size_t BlitCommandsHelper::estimatePreBlitCommandSize() { } template -void BlitCommandsHelper::dispatchPostBlitCommand(LinearStream &linearStream, const ProductHelper &productHelper) { +void BlitCommandsHelper::dispatchPostBlitCommand(LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { MiFlushArgs args; + EncodeDummyBlitWaArgs waArgs{true, &rootDeviceEnvironment}; + + auto &productHelper = rootDeviceEnvironment.getProductHelper(); if (DebugManager.flags.PostBlitCommand.get() != BlitterConstants::PostBlitMode::Default) { switch (DebugManager.flags.PostBlitCommand.get()) { case BlitterConstants::PostBlitMode::MiArbCheck: - EncodeMiArbCheck::program(linearStream); + EncodeMiArbCheck::programWithWa(linearStream, std::nullopt, waArgs); return; case BlitterConstants::PostBlitMode::MiFlush: EncodeMiFlushDW::programMiFlushDw(linearStream, 0, 0, args, productHelper); @@ -77,15 +80,16 @@ void BlitCommandsHelper::dispatchPostBlitCommand(LinearStream &linear EncodeMiFlushDW::programMiFlushDw(linearStream, 0, 0, args, productHelper); } - EncodeMiArbCheck::program(linearStream); + EncodeMiArbCheck::programWithWa(linearStream, std::nullopt, waArgs); } template -size_t BlitCommandsHelper::estimatePostBlitCommandSize() { +size_t BlitCommandsHelper::estimatePostBlitCommandSize(const RootDeviceEnvironment &rootDeviceEnvironment) { + EncodeDummyBlitWaArgs waArgs{true, const_cast(&rootDeviceEnvironment)}; if (DebugManager.flags.PostBlitCommand.get() != BlitterConstants::PostBlitMode::Default) { switch (DebugManager.flags.PostBlitCommand.get()) { case BlitterConstants::PostBlitMode::MiArbCheck: - return EncodeMiArbCheck::getCommandSize(); + return EncodeMiArbCheck::getCommandSizeWithWa(waArgs); case BlitterConstants::PostBlitMode::MiFlush: return EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); default: @@ -94,10 +98,10 @@ size_t BlitCommandsHelper::estimatePostBlitCommandSize() { } if (BlitCommandsHelper::miArbCheckWaRequired()) { - return (EncodeMiArbCheck::getCommandSize() + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); + return (EncodeMiArbCheck::getCommandSizeWithWa(waArgs) + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); } - return EncodeMiArbCheck::getCommandSize(); + return EncodeMiArbCheck::getCommandSizeWithWa(waArgs); } template @@ -123,7 +127,7 @@ size_t BlitCommandsHelper::estimateBlitCommandSize(const Vec3 sizePerBlit = sizeof(typename GfxFamily::XY_COPY_BLT); } - sizePerBlit += estimatePostBlitCommandSize(); + sizePerBlit += estimatePostBlitCommandSize(rootDeviceEnvironment); return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDependencies) + (sizePerBlit * nBlits) + @@ -178,7 +182,7 @@ uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(co } template -void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { +void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { uint64_t width = 1; uint64_t height = 1; @@ -224,7 +228,7 @@ void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const Bl auto bltStream = linearStream.getSpaceForCmd(); *bltStream = bltCmd; - dispatchPostBlitCommand(linearStream, productHelper); + dispatchPostBlitCommand(linearStream, rootDeviceEnvironment); auto blitSize = width * height; sizeToBlit -= blitSize; @@ -274,7 +278,7 @@ void BlitCommandsHelper::dispatchBlitMemoryFill(NEO::GraphicsAllocati } template -void BlitCommandsHelper::dispatchBlitCommandsForImageRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { +void BlitCommandsHelper::dispatchBlitCommandsForImageRegion(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { auto &productHelper = rootDeviceEnvironment.getHelper(); auto srcSlicePitch = static_cast(blitProperties.srcSlicePitch); @@ -309,7 +313,7 @@ void BlitCommandsHelper::dispatchBlitCommandsForImageRegion(const Bli auto cmd = linearStream.getSpaceForCmd(); *cmd = bltCmd; - dispatchPostBlitCommand(linearStream, productHelper); + dispatchPostBlitCommand(linearStream, rootDeviceEnvironment); } } @@ -347,7 +351,7 @@ uint32_t BlitCommandsHelper::getAvailableBytesPerPixel(size_t copySiz } template -void BlitCommandsHelper::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { +void BlitCommandsHelper::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { if (blitProperties.isImageOperation()) { dispatchBlitCommandsForImageRegion(blitProperties, linearStream, rootDeviceEnvironment); } else { @@ -383,7 +387,7 @@ void BlitCommandsHelper::appendBlitCommandsMemCopy(const BlitProperti } template -void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { +void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { auto &productHelper = rootDeviceEnvironment.getHelper(); const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); @@ -417,7 +421,7 @@ void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const Bl auto cmd = linearStream.getSpaceForCmd(); *cmd = bltCmd; - dispatchPostBlitCommand(linearStream, productHelper); + dispatchPostBlitCommand(linearStream, rootDeviceEnvironment); srcAddress += width; dstAddress += width; @@ -525,4 +529,5 @@ template size_t BlitCommandsHelper::getWaCmdsSize(const BlitPropertiesContainer &blitPropertiesContainer) { return 0; } + } // namespace NEO diff --git a/shared/source/helpers/blit_commands_helper_bdw_and_later.inl b/shared/source/helpers/blit_commands_helper_bdw_and_later.inl index 5620e879b8..001ef3dfb0 100644 --- a/shared/source/helpers/blit_commands_helper_bdw_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -101,4 +101,16 @@ void BlitCommandsHelper::appendClearColor(const BlitProperties &blitP template void BlitCommandsHelper::printImageBlitBlockCopyCommand(const typename GfxFamily::XY_BLOCK_COPY_BLT &blitCmd, const uint32_t sliceIndex) {} +template +void BlitCommandsHelper::dispatchDummyBlit(LinearStream &linearStream, EncodeDummyBlitWaArgs &waArgs) {} + +template +bool BlitCommandsHelper::isDummyBlitWaNeeded(const EncodeDummyBlitWaArgs &waArgs) { + return false; +} + +template +size_t BlitCommandsHelper::getDummyBlitSize(const EncodeDummyBlitWaArgs &waArgs) { + return 0u; +} } // namespace NEO diff --git a/shared/source/helpers/blit_commands_helper_xehp_and_later.inl b/shared/source/helpers/blit_commands_helper_xehp_and_later.inl index 4e677511e0..1bcc2c84ed 100644 --- a/shared/source/helpers/blit_commands_helper_xehp_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_xehp_and_later.inl @@ -435,4 +435,48 @@ void BlitCommandsHelper::printImageBlitBlockCopyCommand(const typenam printf("DestinationArrayIndex: %u\n\n", blitCmd.getDestinationArrayIndex()); } +template +bool BlitCommandsHelper::isDummyBlitWaNeeded(const EncodeDummyBlitWaArgs &waArgs) { + if (waArgs.isBcs) { + UNRECOVERABLE_IF(!waArgs.rootDeviceEnvironment); + if (DebugManager.flags.ForceDummyBlitWa.get() != -1) { + return DebugManager.flags.ForceDummyBlitWa.get(); + } + auto &productHelper = waArgs.rootDeviceEnvironment->getProductHelper(); + return productHelper.isDummyBlitWaRequired(); + } + return false; +} + +template +void BlitCommandsHelper::dispatchDummyBlit(LinearStream &linearStream, EncodeDummyBlitWaArgs &waArgs) { + using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; + + if (BlitCommandsHelper::isDummyBlitWaNeeded(waArgs)) { + auto blitCmd = GfxFamily::cmdInitXyColorBlt; + auto &rootDeviceEnvironment = waArgs.rootDeviceEnvironment; + + rootDeviceEnvironment->initDummyAllocation(); + auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation(); + blitCmd.setDestinationBaseAddress(dummyAllocation->getGpuAddress()); + blitCmd.setColorDepth(COLOR_DEPTH::COLOR_DEPTH_64_BIT_COLOR); + blitCmd.setDestinationX2CoordinateRight(1u); + blitCmd.setDestinationY2CoordinateBottom(4u); + blitCmd.setDestinationPitch(static_cast(MemoryConstants::pageSize)); + + appendTilingEnable(blitCmd); + appendBlitCommandsForFillBuffer(dummyAllocation, blitCmd, *rootDeviceEnvironment); + + auto cmd = linearStream.getSpaceForCmd(); + *cmd = blitCmd; + } +} + +template +size_t BlitCommandsHelper::getDummyBlitSize(const EncodeDummyBlitWaArgs &waArgs) { + if (BlitCommandsHelper::isDummyBlitWaNeeded(waArgs)) { + return sizeof(typename GfxFamily::XY_COLOR_BLT); + } + return 0u; +} } // namespace NEO diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.cpp b/shared/source/memory_manager/os_agnostic_memory_manager.cpp index f41b20f691..5d2fdcab76 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/shared/source/memory_manager/os_agnostic_memory_manager.cpp @@ -40,7 +40,9 @@ void OsAgnosticMemoryManager::initialize(bool aubUsage) { for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); + auto &gfxCoreHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper(); this->enable64kbpages[rootDeviceIndex] = is64kbPagesEnabled(hwInfo); + this->localMemorySupported.push_back(gfxCoreHelper.getEnableLocalMemory(*hwInfo)); auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace; if (!getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex, gfxPartitions.size(), heapAssigner.apiAllowExternalHeapForSshAndDsh)) { initialized = false; diff --git a/shared/source/os_interface/device_factory.cpp b/shared/source/os_interface/device_factory.cpp index 706fac9cd4..053bc3cc18 100644 --- a/shared/source/os_interface/device_factory.cpp +++ b/shared/source/os_interface/device_factory.cpp @@ -53,9 +53,8 @@ bool DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(ExecutionE for (auto rootDeviceIndex = 0u; rootDeviceIndex < numRootDevices; rootDeviceIndex++) { auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get(); + rootDeviceEnvironment.setHwInfoAndInitHelpers(hwInfoConst); auto hardwareInfo = rootDeviceEnvironment.getMutableHardwareInfo(); - *hardwareInfo = *hwInfoConst; - rootDeviceEnvironment.initHelpers(); if (DebugManager.flags.OverrideRevision.get() != -1) { hardwareInfo->platform.usRevId = static_cast(DebugManager.flags.OverrideRevision.get()); diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index 28e97725ef..f8abb66648 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -165,6 +165,7 @@ class ProductHelper { virtual bool isBufferPoolAllocatorSupported() const = 0; virtual uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const = 0; virtual bool isTlbFlushRequired() const = 0; + virtual bool isDummyBlitWaRequired() const = 0; virtual bool getFrontEndPropertyScratchSizeSupport() const = 0; virtual bool getFrontEndPropertyPrivateScratchSizeSupport() const = 0; virtual bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const = 0; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 71ad232c6f..a640ba5181 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -733,4 +733,9 @@ template bool ProductHelperHw::isCalculationForDisablingEuFusionWithDpasNeeded() const { return false; } + +template +bool ProductHelperHw::isDummyBlitWaRequired() const { + return false; +} } // namespace NEO diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index f8881ebb3e..ce2131abfc 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -125,6 +125,7 @@ class ProductHelperHw : public ProductHelper { bool isBufferPoolAllocatorSupported() const override; uint64_t overridePatIndex(AllocationType allocationType, uint64_t patIndex) const override; bool isTlbFlushRequired() const override; + bool isDummyBlitWaRequired() const override; bool getFrontEndPropertyScratchSizeSupport() const override; bool getFrontEndPropertyPrivateScratchSizeSupport() const override; bool getFrontEndPropertyComputeDispatchAllWalkerSupport() const override; diff --git a/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl b/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl index 32f8ab678b..5128551391 100644 --- a/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl +++ b/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl @@ -253,4 +253,8 @@ template <> bool ProductHelperHw::isCalculationForDisablingEuFusionWithDpasNeeded() const { return true; } +template <> +bool ProductHelperHw::isDummyBlitWaRequired() const { + return true; +} } // namespace NEO diff --git a/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl b/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl index 52da2395bb..04fb89866b 100644 --- a/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl +++ b/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl @@ -139,4 +139,9 @@ int ProductHelperHw::getProductMaxPreferredSlmSize(const HardwareInf return preferredEnumValue; } } + +template <> +bool ProductHelperHw::isDummyBlitWaRequired() const { + return true; +} } // namespace NEO diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index a2c1972a83..c992ca05f1 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -76,7 +76,6 @@ class MockDevice : public RootDevice { using RootDevice::subdevices; void setOSTime(OSTime *osTime); - void setDriverInfo(DriverInfo *driverInfo); static bool createSingleDevice; bool createDeviceImpl() override; diff --git a/shared/test/common/mocks/mock_execution_environment.h b/shared/test/common/mocks/mock_execution_environment.h index 19f5d23652..c32f8597b0 100644 --- a/shared/test/common/mocks/mock_execution_environment.h +++ b/shared/test/common/mocks/mock_execution_environment.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,8 +13,8 @@ namespace NEO { struct MockRootDeviceEnvironment : public RootDeviceEnvironment { + using RootDeviceEnvironment::isDummyAllocationInitialized; using RootDeviceEnvironment::RootDeviceEnvironment; - ~MockRootDeviceEnvironment() override = default; void initAubCenter(bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType) override; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index e290452278..8cd3a83a7d 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -504,4 +504,5 @@ EnableMultipleRegularContextForBcs = -1 AppendAubStreamContextFlags = -1 DirectSubmissionRelaxedOrderingMinNumberOfClients = -1 UseDeprecatedClDeviceIpVersion = 0 -ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 \ No newline at end of file +ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 +ForceDummyBlitWa = 0 \ No newline at end of file diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 6915cdcf61..ce64c65864 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -175,8 +175,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncoderTests, givenDebugFlagSetWhenProgrammi MI_ARB_CHECK buffer[2] = {}; LinearStream linearStream(buffer, sizeof(buffer)); + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + rootDeviceEnvironment.initGmm(); - EncodeMiArbCheck::program(linearStream); + EncodeDummyBlitWaArgs waArgs{}; + EncodeMiArbCheck::programWithWa(linearStream, false, waArgs); if (value == 0) { EXPECT_TRUE(buffer[0].getPreParserDisable()); diff --git a/shared/test/unit_test/debugger/test_l0_debugger_single_address_space.cpp b/shared/test/unit_test/debugger/test_l0_debugger_single_address_space.cpp index 9cc3b1889d..85a2530067 100644 --- a/shared/test/unit_test/debugger/test_l0_debugger_single_address_space.cpp +++ b/shared/test/unit_test/debugger/test_l0_debugger_single_address_space.cpp @@ -147,19 +147,19 @@ HWTEST2_P(L0DebuggerBBlevelParameterizedTest, GivenNonZeroSbaAddressesWhenProgra GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); - size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); + size_t sizeExpected = EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}) + sizeof(MI_BATCH_BUFFER_START); for (int i = 0; i < 6; i++) { sizeExpected += NEO::EncodeSetMMIO::sizeIMM; sizeExpected += NEO::EncodeMath::streamCommandSize; sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM); sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM); - sizeExpected += sizeof(MI_ARB_CHECK); + sizeExpected += EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}); sizeExpected += sizeof(MI_BATCH_BUFFER_START); sizeExpected += sizeof(MI_STORE_DATA_IMM); } - sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); + sizeExpected += EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}) + sizeof(MI_BATCH_BUFFER_START); EXPECT_EQ(sizeExpected, cmdStream.getUsed()); @@ -259,17 +259,17 @@ HWTEST2_P(L0DebuggerBBlevelParameterizedTest, GivenOneNonZeroSbaAddressesWhenPro GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); - size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); + size_t sizeExpected = EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}) + sizeof(MI_BATCH_BUFFER_START); sizeExpected += NEO::EncodeSetMMIO::sizeIMM; sizeExpected += NEO::EncodeMath::streamCommandSize; sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM); sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM); - sizeExpected += sizeof(MI_ARB_CHECK); + sizeExpected += EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}); sizeExpected += sizeof(MI_BATCH_BUFFER_START); sizeExpected += sizeof(MI_STORE_DATA_IMM); - sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); + sizeExpected += EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}) + sizeof(MI_BATCH_BUFFER_START); EXPECT_EQ(sizeExpected, cmdStream.getUsed()); EXPECT_EQ(sizeExpected, debugger->getSbaTrackingCommandsSize(1)); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index bc935857e3..591ac890bc 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -14,6 +14,7 @@ #include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_lib.h" +#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/register_offsets.h" @@ -1054,6 +1055,9 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf template bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate); + template + bool verifyDummyBlt(typename FamilyType::XY_COLOR_BLT *cmd); + DebugManagerStateRestore restore; FlushStampTracker flushStamp{true}; }; @@ -1091,6 +1095,14 @@ bool DirectSubmissionRelaxedOrderingTests::verifyLrr(typename FamilyType::MI_LOA return true; } +template +bool DirectSubmissionRelaxedOrderingTests::verifyDummyBlt(typename FamilyType::XY_COLOR_BLT *cmd) { + if (cmd->getDestinationX2CoordinateRight() == 1u && cmd->getDestinationY2CoordinateBottom() == 4u && cmd->getDestinationPitch() == static_cast(MemoryConstants::pageSize)) { + return true; + } + return false; +} + template bool DirectSubmissionRelaxedOrderingTests::verifyIncrementOrDecrement(void *cmds, AluRegisters aluRegister, bool increment) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; @@ -1305,6 +1317,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_MATH = typename FamilyType::MI_MATH; + using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; uint64_t schedulerStartGpuAddress = schedulerAllocation.getGpuAddress(); void *schedulerCmds = schedulerAllocation.getUnderlyingBuffer(); @@ -1571,6 +1584,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap } // 5. Drain request section + auto arbCheck = reinterpret_cast(++lriCmd); if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) { return false; @@ -1665,7 +1679,6 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; using MI_MATH = typename FamilyType::MI_MATH; - using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; HardwareParse hwParse; hwParse.parseCommands(cs, offset); @@ -1798,7 +1811,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispa const uint64_t expectedQueueSizeValueVa = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress() + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::drainRequestSectionStart + - sizeof(typename FamilyType::MI_ARB_CHECK) + + EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}) + RelaxedOrderingHelper::getQueueSizeLimitValueOffset(); auto findStaticSchedulerUpdate = [&](LinearStream &cs, size_t offset, uint32_t expectedQueueSize) { diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hpc_core.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hpc_core.cpp index 5c3e2133aa..ab572c2fba 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hpc_core.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hpc_core.cpp @@ -1,10 +1,11 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#include "shared/source/command_container/command_encoder.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/xe_hp_core/hw_cmds.h" #include "shared/test/common/cmd_parse/hw_parse.h" @@ -21,7 +22,7 @@ XE_HPC_CORETEST_F(DirectSubmissionTestXeHpcCore, givenXeHpcCoreWhenDispatchDisab MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); - EXPECT_EQ(sizeof(MI_ARB_CHECK), directSubmission.getSizeDisablePrefetcher()); + EXPECT_EQ(EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{}), directSubmission.getSizeDisablePrefetcher()); bool ret = directSubmission.allocateResources(); EXPECT_TRUE(ret); diff --git a/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp b/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp index 601cc39690..7af15435d4 100644 --- a/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/encode_surface_state.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index e0cf7bf5b6..c87c8cbfea 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -150,7 +150,7 @@ HWTEST_F(BlitTests, givenDebugVariablesWhenGettingMaxBlitSizeThenHonorUseProvide } HWTEST_F(BlitTests, givenDebugVariableWhenEstimatingPostBlitsCommandSizeThenReturnCorrectResult) { - const size_t arbCheckSize = sizeof(typename FamilyType::MI_ARB_CHECK); + size_t arbCheckSize = EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{true, const_cast(&(pDevice->getRootDeviceEnvironment()))}); DebugManagerStateRestore restore{}; @@ -160,16 +160,16 @@ HWTEST_F(BlitTests, givenDebugVariableWhenEstimatingPostBlitsCommandSizeThenRetu expectedDefaultSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } - EXPECT_EQ(expectedDefaultSize, BlitCommandsHelper::estimatePostBlitCommandSize()); + EXPECT_EQ(expectedDefaultSize, BlitCommandsHelper::estimatePostBlitCommandSize(pDevice->getRootDeviceEnvironment())); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::MiArbCheck); - EXPECT_EQ(arbCheckSize, BlitCommandsHelper::estimatePostBlitCommandSize()); + EXPECT_EQ(arbCheckSize, BlitCommandsHelper::estimatePostBlitCommandSize(pDevice->getRootDeviceEnvironment())); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::MiFlush); - EXPECT_EQ(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(), BlitCommandsHelper::estimatePostBlitCommandSize()); + EXPECT_EQ(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(), BlitCommandsHelper::estimatePostBlitCommandSize(pDevice->getRootDeviceEnvironment())); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::None); - EXPECT_EQ(0u, BlitCommandsHelper::estimatePostBlitCommandSize()); + EXPECT_EQ(0u, BlitCommandsHelper::estimatePostBlitCommandSize(pDevice->getRootDeviceEnvironment())); } HWTEST_F(BlitTests, givenDebugVariableWhenDispatchingPostBlitsCommandThenUseCorrectCommands) { @@ -180,14 +180,15 @@ HWTEST_F(BlitTests, givenDebugVariableWhenDispatchingPostBlitsCommandThenUseCorr LinearStream linearStream{streamBuffer, sizeof(streamBuffer)}; GenCmdList commands{}; - size_t expectedDefaultSize = sizeof(MI_ARB_CHECK); + size_t expectedDefaultSize = EncodeMiArbCheck::getCommandSizeWithWa(EncodeDummyBlitWaArgs{true, const_cast(&(pDevice->getRootDeviceEnvironment()))}); if (BlitCommandsHelper::miArbCheckWaRequired()) { expectedDefaultSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } // -1: default - BlitCommandsHelper::dispatchPostBlitCommand(linearStream, this->pDevice->getProductHelper()); + BlitCommandsHelper::dispatchPostBlitCommand(linearStream, pDevice->getRootDeviceEnvironmentRef()); + EXPECT_EQ(expectedDefaultSize, linearStream.getUsed()); CmdParse::parseCommandBuffer(commands, linearStream.getCpuBase(), linearStream.getUsed()); @@ -209,7 +210,8 @@ HWTEST_F(BlitTests, givenDebugVariableWhenDispatchingPostBlitsCommandThenUseCorr linearStream.replaceBuffer(streamBuffer, sizeof(streamBuffer)); commands.clear(); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::MiArbCheck); - BlitCommandsHelper::dispatchPostBlitCommand(linearStream, this->pDevice->getProductHelper()); + BlitCommandsHelper::dispatchPostBlitCommand(linearStream, pDevice->getRootDeviceEnvironmentRef()); + CmdParse::parseCommandBuffer(commands, linearStream.getCpuBase(), linearStream.getUsed()); arbCheck = find(commands.begin(), commands.end()); EXPECT_NE(commands.end(), arbCheck); @@ -219,7 +221,8 @@ HWTEST_F(BlitTests, givenDebugVariableWhenDispatchingPostBlitsCommandThenUseCorr linearStream.replaceBuffer(streamBuffer, sizeof(streamBuffer)); commands.clear(); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::MiFlush); - BlitCommandsHelper::dispatchPostBlitCommand(linearStream, this->pDevice->getProductHelper()); + BlitCommandsHelper::dispatchPostBlitCommand(linearStream, pDevice->getRootDeviceEnvironmentRef()); + CmdParse::parseCommandBuffer(commands, linearStream.getCpuBase(), linearStream.getUsed()); auto miFlush = find(commands.begin(), commands.end()); EXPECT_NE(commands.end(), miFlush); @@ -229,7 +232,8 @@ HWTEST_F(BlitTests, givenDebugVariableWhenDispatchingPostBlitsCommandThenUseCorr linearStream.replaceBuffer(streamBuffer, sizeof(streamBuffer)); commands.clear(); DebugManager.flags.PostBlitCommand.set(BlitterConstants::PostBlitMode::None); - BlitCommandsHelper::dispatchPostBlitCommand(linearStream, this->pDevice->getProductHelper()); + BlitCommandsHelper::dispatchPostBlitCommand(linearStream, pDevice->getRootDeviceEnvironmentRef()); + EXPECT_EQ(0u, linearStream.getUsed()); } diff --git a/shared/test/unit_test/helpers/test_blit_commands_helper_xehp_and_later.cpp b/shared/test/unit_test/helpers/test_blit_commands_helper_xehp_and_later.cpp index f931448165..33e8c6102a 100644 --- a/shared/test/unit_test/helpers/test_blit_commands_helper_xehp_and_later.cpp +++ b/shared/test/unit_test/helpers/test_blit_commands_helper_xehp_and_later.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" @@ -12,7 +13,10 @@ #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/os_interface/product_helper_hw.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/raii_hw_info_config.h" +#include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" @@ -1214,3 +1218,189 @@ HWTEST2_F(BlitTests, givenDebugVariableWhenDispatchBlitCommandsForImageRegionIsC << "DestinationArrayIndex: 1\n\n"; EXPECT_EQ(expectedOutput.str(), output); } + +template +class TestDummyBlitMockProductHelper : public ProductHelperHw { + public: + bool isDummyBlitWaRequired() const override { + return dummyBlitRequired; + } + uint32_t dummyBlitRequired = true; +}; + +HWTEST2_F(BlitTests, givenDispatchDummyBlitWhenDummyBlitWaRequiredThenColorBltProgrammedCorrectly, IsXeHPOrAbove) { + using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; + DebugManagerStateRestore dbgRestore; + DebugManager.flags.ForceDummyBlitWa.set(-1); + + auto &rootDeviceEnvironment = static_cast(pDevice->getRootDeviceEnvironmentRef()); + + RAIIProductHelperFactory> raii{ + rootDeviceEnvironment}; + auto &productHelper = *raii.mockProductHelper; + productHelper.dummyBlitRequired = true; + + uint32_t streamBuffer[100] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + + size_t expectedSize = 0u; + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenDefault{}; + auto val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenDefault); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenDefault); + EXPECT_EQ(0u, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenNotBcs{false, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenNotBcs); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + EXPECT_EQ(0u, stream.getUsed()); + + EncodeDummyBlitWaArgs waArgsWhenBcs{true, &rootDeviceEnvironment}; + expectedSize = sizeof(XY_COLOR_BLT); + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenBcs); + EXPECT_NE(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EXPECT_EQ(expectedSize, stream.getUsed()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(stream.getCpuBase(), 0), stream.getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + + EXPECT_EQ(rootDeviceEnvironment.getDummyAllocation()->getGpuAddress(), cmd->getDestinationBaseAddress()); + EXPECT_EQ(XY_COLOR_BLT::COLOR_DEPTH::COLOR_DEPTH_64_BIT_COLOR, cmd->getColorDepth()); + EXPECT_EQ(1u, cmd->getDestinationX2CoordinateRight()); + EXPECT_EQ(4u, cmd->getDestinationY2CoordinateBottom()); + EXPECT_EQ(static_cast(MemoryConstants::pageSize), cmd->getDestinationPitch()); + EXPECT_EQ(XY_COLOR_BLT::DESTINATION_SURFACE_TYPE::DESTINATION_SURFACE_TYPE_2D, cmd->getDestinationSurfaceType()); +} + +HWTEST2_F(BlitTests, givenDispatchDummyBlitWhenForceDummyBlitWaSetThenColorBltProgrammedCorrectly, IsXeHPOrAbove) { + using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; + DebugManagerStateRestore dbgRestore; + DebugManager.flags.ForceDummyBlitWa.set(1); + + auto &rootDeviceEnvironment = static_cast(pDevice->getRootDeviceEnvironmentRef()); + + uint32_t streamBuffer[100] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + + size_t expectedSize = 0u; + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenDefault{}; + auto val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenDefault); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenDefault); + EXPECT_EQ(0u, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenNotBcs{false, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenNotBcs); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + EXPECT_EQ(0u, stream.getUsed()); + + EncodeDummyBlitWaArgs waArgsWhenBcs{true, &rootDeviceEnvironment}; + expectedSize = sizeof(XY_COLOR_BLT); + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenBcs); + EXPECT_NE(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EXPECT_EQ(expectedSize, stream.getUsed()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(stream.getCpuBase(), 0), stream.getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + + EXPECT_EQ(rootDeviceEnvironment.getDummyAllocation()->getGpuAddress(), cmd->getDestinationBaseAddress()); + EXPECT_EQ(XY_COLOR_BLT::COLOR_DEPTH::COLOR_DEPTH_64_BIT_COLOR, cmd->getColorDepth()); + EXPECT_EQ(1u, cmd->getDestinationX2CoordinateRight()); + EXPECT_EQ(4u, cmd->getDestinationY2CoordinateBottom()); + EXPECT_EQ(static_cast(MemoryConstants::pageSize), cmd->getDestinationPitch()); + EXPECT_EQ(XY_COLOR_BLT::DESTINATION_SURFACE_TYPE::DESTINATION_SURFACE_TYPE_2D, cmd->getDestinationSurfaceType()); +} + +HWTEST2_F(BlitTests, givenDispatchDummyBlitWhenDummyBlitWaNotRequiredThenAdditionalCommandsAreNotProgrammed, IsXeHPOrAbove) { + using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; + DebugManagerStateRestore dbgRestore; + DebugManager.flags.ForceDummyBlitWa.set(-1); + auto &rootDeviceEnvironment = static_cast(pDevice->getRootDeviceEnvironmentRef()); + + RAIIProductHelperFactory> raii{ + rootDeviceEnvironment}; + auto &productHelper = *raii.mockProductHelper; + productHelper.dummyBlitRequired = false; + + uint32_t streamBuffer[100] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + + size_t expectedSize = 0u; + + EncodeDummyBlitWaArgs waArgsWhenDefault{}; + auto val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenDefault); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenDefault); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenNotBcs{false, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenBcs{true, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenBcs); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); +} + +HWTEST2_F(BlitTests, givenDispatchDummyBlitWhenForceDummyBlitWaDisabledThenAdditionalCommandsAreNotProgrammed, IsXeHPOrAbove) { + using XY_COLOR_BLT = typename FamilyType::XY_COLOR_BLT; + DebugManagerStateRestore dbgRestore; + DebugManager.flags.ForceDummyBlitWa.set(0); + auto &rootDeviceEnvironment = static_cast(pDevice->getRootDeviceEnvironmentRef()); + + uint32_t streamBuffer[100] = {}; + LinearStream stream(streamBuffer, sizeof(streamBuffer)); + + size_t expectedSize = 0u; + + EncodeDummyBlitWaArgs waArgsWhenDefault{}; + auto val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenDefault); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenDefault); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenNotBcs{false, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, val); + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenNotBcs); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); + + EncodeDummyBlitWaArgs waArgsWhenBcs{true, &rootDeviceEnvironment}; + val = BlitCommandsHelper::getDummyBlitSize(waArgsWhenBcs); + EXPECT_EQ(expectedSize, val); + + BlitCommandsHelper::dispatchDummyBlit(stream, waArgsWhenBcs); + EXPECT_EQ(expectedSize, stream.getUsed()); + EXPECT_EQ(nullptr, rootDeviceEnvironment.getDummyAllocation()); +} \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp index 81332bc2bd..4dad78746d 100644 --- a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp +++ b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp @@ -682,4 +682,8 @@ HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckingIsMultiContextResource HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckingIsUnlockingLockedPtrNecessaryThenReturnFalse) { EXPECT_FALSE(productHelper->isUnlockingLockedPtrNecessary(pInHwInfo)); +} + +HWTEST_F(ProductHelperTest, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnFalse) { + EXPECT_FALSE(productHelper->isDummyBlitWaRequired()); } \ No newline at end of file diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index e5b8382f46..13add0c45d 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -25,6 +25,7 @@ using namespace NEO; TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedThenMemoryIsAllocatedAsNonSvmAllocation) { MockDevice device{}; REQUIRE_SVM_OR_SKIP(&device); + device.injectMemoryManager(new MockMemoryManager()); MockSVMAllocsManager svmAllocsManager(device.getMemoryManager(), false); WhiteBox emptyLinkerInput; std::vector initData; @@ -67,8 +68,8 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreNotExportedTh TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenMemoryIsAllocatedAsUsmDeviceAllocation) { MockDevice device{}; REQUIRE_SVM_OR_SKIP(&device); - MockMemoryManager memoryManager; - MockSVMAllocsManager svmAllocsManager(&memoryManager, false); + device.injectMemoryManager(new MockMemoryManager()); + MockSVMAllocsManager svmAllocsManager(device.getMemoryManager(), false); WhiteBox linkerInputExportGlobalVariables; WhiteBox linkerInputExportGlobalConstants; linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; @@ -112,6 +113,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM TEST(AllocateGlobalSurfaceTest, GivenNullSvmAllocsManagerWhenGlobalsAreExportedThenMemoryIsAllocatedAsNonSvmAllocation) { MockDevice device{}; + device.injectMemoryManager(new MockMemoryManager()); WhiteBox linkerInputExportGlobalVariables; WhiteBox linkerInputExportGlobalConstants; linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true; @@ -233,6 +235,7 @@ TEST(AllocateGlobalSurfaceTest, GivenAllocationInLocalMemoryWhichRequiresBlitter TEST(AllocateGlobalSurfaceTest, whenAllocatingGlobalSurfaceWithNonZeroZeroInitSizeThenTransferOnlyInitDataToAllocation) { MockDevice device{}; WhiteBox emptyLinkerInput; + device.injectMemoryManager(new MockMemoryManager()); emptyLinkerInput.traits.exportsGlobalConstants = true; std::vector initData; initData.resize(64, 7u); diff --git a/shared/test/unit_test/xe_hpg_core/excludes_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/excludes_xe_hpg_core.cpp index 73b104b550..220af07d65 100644 --- a/shared/test/unit_test/xe_hpg_core/excludes_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/excludes_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,6 +11,7 @@ HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenSamplerStateWhenAdjustSamplerStat HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfBlitterForImagesIsSupportedThenFalseIsReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenIsSystolicModeConfigurabledThenFalseIsReturned, IGFX_XE_HPG_CORE); +HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnFalse, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(CommandEncodeStatesTest, givenEventAddressWhenEncodeThenMocsFromGmmHelperIsSet, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterPreemptionTests, GivenDebuggerUsedWhenProgrammingStateSipThenStateSipIsAdded, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(ComputeModeRequirements, givenComputeModeCmdSizeWhenLargeGrfModeChangeIsRequiredThenSCMCommandSizeIsCalculated, IGFX_XE_HPG_CORE); diff --git a/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp index 190d56d1ef..31bc4d0d59 100644 --- a/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp @@ -172,6 +172,11 @@ XE_HPG_CORETEST_F(ProductHelperTestXeHpgCore, givenProductHelperWhenCheckTimesta EXPECT_FALSE(productHelper.isTimestampWaitSupportedForEvents()); } +XE_HPG_CORETEST_F(ProductHelperTestXeHpgCore, givenProductHelperWhenCheckDummyBlitWaRequiredThenReturnTrue) { + auto &productHelper = getHelper(); + EXPECT_TRUE(productHelper.isDummyBlitWaRequired()); +} + XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, givenGfxCoreHelperWhenCheckTimestampWaitSupportForQueuesThenReturnFalse) { auto &gfxCoreHelper = getHelper(); EXPECT_FALSE(gfxCoreHelper.isTimestampWaitSupportedForQueues());