diff --git a/opencl/source/os_interface/linux/drm_command_stream.inl b/opencl/source/os_interface/linux/drm_command_stream.inl index df3bda851c..2e55838efb 100644 --- a/opencl/source/os_interface/linux/drm_command_stream.inl +++ b/opencl/source/os_interface/linux/drm_command_stream.inl @@ -77,6 +77,10 @@ bool DrmCommandStreamReceiver::flush(BatchBuffer &batchBuffer, Reside memoryOperationsInterface->makeResidentWithinOsContext(this->osContext, ArrayRef(&batchBuffer.commandBufferAllocation, 1), true); return this->directSubmission->dispatchCommandBuffer(batchBuffer, *this->flushStamp.get()); } + if (this->blitterDirectSubmission.get()) { + memoryOperationsInterface->makeResidentWithinOsContext(this->osContext, ArrayRef(&batchBuffer.commandBufferAllocation, 1), true); + return this->blitterDirectSubmission->dispatchCommandBuffer(batchBuffer, *this->flushStamp.get()); + } this->flushStamp->setStamp(bb->peekHandle()); this->flushInternal(batchBuffer, allocationsForResidency); diff --git a/opencl/source/os_interface/windows/wddm_device_command_stream.inl b/opencl/source/os_interface/windows/wddm_device_command_stream.inl index cbf2b1f3ff..6c241edfb2 100644 --- a/opencl/source/os_interface/windows/wddm_device_command_stream.inl +++ b/opencl/source/os_interface/windows/wddm_device_command_stream.inl @@ -77,6 +77,9 @@ bool WddmCommandStreamReceiver::flush(BatchBuffer &batchBuffer, Resid if (directSubmission.get()) { return directSubmission->dispatchCommandBuffer(batchBuffer, *(flushStamp.get())); } + if (blitterDirectSubmission.get()) { + return blitterDirectSubmission->dispatchCommandBuffer(batchBuffer, *(flushStamp.get())); + } COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandBufferHeader); pHeader->RequiresCoherency = batchBuffer.requiresCoherency; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index a51a9208de..9a08b0fa64 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -222,8 +222,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoBlitterOverrideWhenBlitterNotSuppo properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoBlitterOverrideWhenBlitterSupportedThenExpectTrueReturned) { @@ -233,8 +235,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoBlitterOverrideWhenBlitterSupporte properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableWhenBlitterNotSupportedThenExpectTrueReturned) { @@ -246,8 +250,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableWhenBlitterNotS properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableAndNoStartWhenBlitterNotSupportedThenExpectTrueReturnedStartOnInitSetToTrue) { @@ -259,8 +265,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableAndNoStartWhenB properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideDisableWhenBlitterSupportedThenExpectFalseReturned) { @@ -272,8 +280,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideDisableWhenBlitterSup properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderNotSupportedThenExpectFalseReturned) { @@ -283,8 +293,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderNotSupport properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderSupportedThenExpectTrueReturned) { @@ -294,8 +306,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderSupportedT properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableWhenRenderNotSupportedThenExpectTrueReturned) { @@ -307,8 +321,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableWhenRenderNotSup properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableAndNoStartWhenRenderNotSupportedThenExpectTrueReturnedAndStartOnInitSetFalse) { @@ -320,8 +336,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableAndNoStartWhenRe properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideDisableWhenRenderSupportedThenExpectFalseReturned) { @@ -333,8 +351,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideDisableWhenRenderSuppo properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeNotSupportedThenExpectFalseReturned) { @@ -344,8 +364,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeNotSuppo properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeSupportedThenExpectTrueReturned) { @@ -355,8 +377,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeSupporte properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableWhenComputeNotSupportedThenExpectTrueReturned) { @@ -368,8 +392,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableWhenComputeNotS properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableAndNoStartWhenComputeNotSupportedThenExpectTrueReturnedAndStartOnInitSetToFalse) { @@ -381,8 +407,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableAndNoStartWhenC properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; - EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit)); + bool startInContext = false; + EXPECT_TRUE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideDisableWhenComputeSupportedThenExpectFalseReturned) { @@ -394,8 +422,10 @@ HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideDisableWhenComputeSup properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; - EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit)); + bool startInContext = false; + EXPECT_FALSE(commandStreamReceiver.checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); + EXPECT_FALSE(startInContext); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests; @@ -574,7 +604,7 @@ HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorr EXPECT_EQ(miFlushDwSize + additionalSize, totalSize); } -HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCalculateForAllAttachedProperites) { +HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const uint32_t numberOfBlts = 3; const size_t bltSize = (3 * max2DBlitSize); @@ -598,12 +628,41 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer, false, false, pClDevice->getRootDeviceEnvironment()); + blitPropertiesContainer, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } -HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { +HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectsubmissionEnabledEstimatingCommandsSizeThenCalculateForAllAttachedProperites) { + const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + const uint32_t numberOfBlts = 3; + const size_t bltSize = (3 * max2DBlitSize); + const uint32_t numberOfBlitOperations = 4; + + auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START); + constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; + + auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); + + BlitPropertiesContainer blitPropertiesContainer; + for (uint32_t i = 0; i < numberOfBlitOperations; i++) { + BlitProperties blitProperties; + blitProperties.copySize = {bltSize, 1, 1}; + blitPropertiesContainer.push_back(blitProperties); + + expectedAlignedSize += expectedBlitInstructionsSize; + } + + expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); + + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( + blitPropertiesContainer, false, false, true, pClDevice->getRootDeviceEnvironment()); + + EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); +} + +HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; @@ -627,7 +686,36 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWri expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer, false, false, pClDevice->getRootDeviceEnvironment()); + blitPropertiesContainer, false, false, false, pClDevice->getRootDeviceEnvironment()); + + EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); +} + +HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectSubmissionEnabledEstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { + constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; + const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; + const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; + const size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + + auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START); + auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; + + auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); + + BlitPropertiesContainer blitPropertiesContainer; + for (uint32_t i = 0; i < numberOfBlitOperations; i++) { + BlitProperties blitProperties; + blitProperties.copySize = bltSize; + blitPropertiesContainer.push_back(blitProperties); + + expectedAlignedSize += expectedBlitInstructionsSize; + } + + expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); + + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( + blitPropertiesContainer, false, false, true, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 744296c648..a91836f5f0 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -6,6 +6,9 @@ */ #include "shared/source/command_stream/scratch_space_controller_base.h" +#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" +#include "shared/test/unit_test/helpers/ult_hw_config.h" +#include "shared/test/unit_test/mocks/mock_direct_submission_hw.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" @@ -66,7 +69,7 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir blitPropertiesContainer.push_back(blitProperties); auto estimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment()); + blitPropertiesContainer, false, true, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment())); @@ -1166,6 +1169,50 @@ HWTEST_F(BcsTests, givenInvalidBlitDirectionWhenConstructPropertiesThenException EXPECT_THROW(ClBlitProperties::constructProperties(static_cast(7), csr, {}), std::exception); } +HWTEST_F(BcsTests, givenBlitterDirectSubmissionEnabledWhenProgrammingBlitterThenExpectRingBufferDispatched) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + VariableBackup backup(&ultHwConfig); + ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable = true; + + auto &csr = pDevice->getUltCommandStreamReceiver(); + using DirectSubmission = MockDirectSubmissionHw>; + + csr.blitterDirectSubmission = std::make_unique(*pDevice, *csr.osContext); + csr.recordFlusheBatchBuffer = true; + DirectSubmission *directSubmission = reinterpret_cast(csr.blitterDirectSubmission.get()); + bool initRet = directSubmission->initialize(true); + EXPECT_TRUE(initRet); + + cl_int retVal = CL_SUCCESS; + auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + void *hostPtr = reinterpret_cast(0x12340000); + size_t numberNodesPerContainer = 5; + auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); + + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, nullptr, hostPtr, + graphicsAllocation->getGpuAddress(), 0, + 0, 0, {1, 1, 1}, 0, 0, 0, 0); + + MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); + MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); + blitProperties.csrDependencies.push_back(×tamp0); + blitProperties.csrDependencies.push_back(×tamp1); + + blitBuffer(&csr, blitProperties, true); + + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, 0u); + ASSERT_NE(nullptr, csr.latestFlushedBatchBuffer.endCmdPtr); + + MI_BATCH_BUFFER_START *bbStart = hwParser.getCommand(); + ASSERT_NE(nullptr, bbStart); + EXPECT_EQ(csr.latestFlushedBatchBuffer.endCmdPtr, bbStart); + EXPECT_EQ(0ull, bbStart->getBatchBufferStartAddressGraphicsaddress472()); +} + struct MockScratchSpaceController : ScratchSpaceControllerBase { using ScratchSpaceControllerBase::privateScratchAllocation; using ScratchSpaceControllerBase::ScratchSpaceControllerBase; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 0e13098f49..386fc11d95 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -322,8 +322,8 @@ HWTEST_F(CommandStreamReceiverTest, whenDirectSubmissionDisabledThenExpectNoFeat EXPECT_FALSE(csr.isBlitterDirectSubmissionEnabled()); } -struct InitDirectSubmissionTest : public ::testing::Test { - void SetUp() override { +struct InitDirectSubmissionFixture { + void SetUp() { DebugManager.flags.EnableDirectSubmission.set(1); executionEnvironment = new MockExecutionEnvironment(); DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); @@ -333,11 +333,15 @@ struct InitDirectSubmissionTest : public ::testing::Test { device.reset(new MockDevice(executionEnvironment, 0u)); } + void TearDown() {} + DebugManagerStateRestore restore; MockExecutionEnvironment *executionEnvironment; std::unique_ptr device; }; +using InitDirectSubmissionTest = Test; + HWTEST_F(InitDirectSubmissionTest, whenDirectSubmissionEnabledOnRcsThenExpectFeatureAvailable) { auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex()); std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), @@ -559,6 +563,27 @@ HWTEST_F(InitDirectSubmissionTest, givenNonDefaultContextContextWhenDirectSubmis csr.reset(); } +HWTEST_F(InitDirectSubmissionTest, GivenBlitterOverrideEnabledWhenBlitterIsNonDefaultContextThenExpectDirectSubmissionStarted) { + DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(1); + + auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex()); + std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), + 0, device->getDeviceBitfield(), aub_stream::ENGINE_BCS, PreemptionMode::ThreadGroup, + false, false, false)); + osContext->setDefaultContext(false); + + auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = false; + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].useNonDefault = false; + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].submitOnInit = false; + + bool ret = csr->initDirectSubmission(*device, *osContext.get()); + EXPECT_TRUE(ret); + EXPECT_FALSE(csr->isDirectSubmissionEnabled()); + EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); + EXPECT_TRUE(osContext->isDirectSubmissionActive()); +} + HWTEST_F(CommandStreamReceiverTest, whenCsrIsCreatedThenUseTimestampPacketWriteIfPossible) { CommandStreamReceiverHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(UnitTestHelper::isTimestampPacketWriteSupported(), csr.peekTimestampPacketWriteEnabled()); diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index bc59a89452..cabfbead4e 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -29,8 +29,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass = CommandStreamReceiverHw; public: + using BaseClass::blitterDirectSubmission; using BaseClass::checkPlatformSupportsGpuIdleImplicitFlush; using BaseClass::checkPlatformSupportsNewResourceImplicitFlush; + using BaseClass::directSubmission; using BaseClass::dshState; using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; diff --git a/opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h b/opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h index 9c55c392d4..b34c3044f2 100644 --- a/opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h +++ b/opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h @@ -20,6 +20,7 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver::residency; using CommandStreamReceiverHw::directSubmission; + using CommandStreamReceiverHw::blitterDirectSubmission; using CommandStreamReceiverHw::CommandStreamReceiver::lastSentSliceCount; TestedDrmCommandStreamReceiver(gemCloseWorkerMode mode, ExecutionEnvironment &executionEnvironment) diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp index ddd11394c6..a0b69826a0 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp @@ -1312,11 +1312,39 @@ struct DrmCommandStreamDirectSubmissionTest : public DrmCommandStreamEnhancedTes DebugManagerStateRestore restorer; }; +struct DrmCommandStreamBlitterDirectSubmissionTest : public DrmCommandStreamDirectSubmissionTest { + template + void SetUpT() { + DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(1u); + DebugManager.flags.DirectSubmissionOverrideRenderSupport.set(0u); + DebugManager.flags.DirectSubmissionOverrideComputeSupport.set(0u); + + DrmCommandStreamDirectSubmissionTest::SetUpT(); + + osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), + 0, device->getDeviceBitfield(), aub_stream::ENGINE_BCS, PreemptionMode::ThreadGroup, + false, false, false)); + csr->initDirectSubmission(*device.get(), *osContext.get()); + } + + template + void TearDownT() { + DrmCommandStreamDirectSubmissionTest::TearDownT(); + } + + std::unique_ptr osContext; +}; + template struct MockDrmDirectSubmission : public DrmDirectSubmission> { using DrmDirectSubmission>::currentTagData; }; +template +struct MockDrmBlitterDirectSubmission : public DrmDirectSubmission> { + using DrmDirectSubmission>::currentTagData; +}; + HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenFlushThenFlushStampIsNotUpdated) { auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); @@ -1331,6 +1359,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi EXPECT_EQ(csr->obtainCurrentFlushStamp(), flushStamp); auto directSubmission = static_cast *>(csr)->directSubmission.get(); + ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; } @@ -1348,9 +1377,50 @@ HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmi EXPECT_EQ(memoryOperationsInterface->isResident(device.get(), *batchBuffer.commandBufferAllocation), MemoryOperationsStatus::SUCCESS); auto directSubmission = static_cast *>(csr)->directSubmission.get(); + ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; } +HWTEST_TEMPLATED_F(DrmCommandStreamBlitterDirectSubmissionTest, givenEnabledDirectSubmissionOnBlitterWhenFlushThenFlushStampIsNotUpdated) { + auto &cs = csr->getCS(); + CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); + CommandStreamReceiverHw::alignToCacheLine(cs); + BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; + uint8_t bbStart[64]; + batchBuffer.endCmdPtr = &bbStart[0]; + + auto flushStamp = csr->obtainCurrentFlushStamp(); + csr->flush(batchBuffer, csr->getResidencyAllocations()); + + EXPECT_EQ(csr->obtainCurrentFlushStamp(), flushStamp); + + auto directSubmission = static_cast *>(csr)->blitterDirectSubmission.get(); + ASSERT_NE(nullptr, directSubmission); + static_cast *>(directSubmission)->currentTagData.tagValue = 0u; + + EXPECT_EQ(nullptr, static_cast *>(csr)->directSubmission.get()); +} + +HWTEST_TEMPLATED_F(DrmCommandStreamBlitterDirectSubmissionTest, givenEnabledDirectSubmissionOnBlitterWhenFlushThenCommandBufferAllocationIsResident) { + auto &cs = csr->getCS(); + CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); + CommandStreamReceiverHw::alignToCacheLine(cs); + BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; + uint8_t bbStart[64]; + batchBuffer.endCmdPtr = &bbStart[0]; + + csr->flush(batchBuffer, csr->getResidencyAllocations()); + + auto memoryOperationsInterface = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get(); + EXPECT_EQ(memoryOperationsInterface->isResident(device.get(), *batchBuffer.commandBufferAllocation), MemoryOperationsStatus::SUCCESS); + + auto directSubmission = static_cast *>(csr)->blitterDirectSubmission.get(); + ASSERT_NE(nullptr, directSubmission); + static_cast *>(directSubmission)->currentTagData.tagValue = 0u; + + EXPECT_EQ(nullptr, static_cast *>(csr)->directSubmission.get()); +} + HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, CheckDrmFree) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); diff --git a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index f10ba19277..dd5eeb75b6 100644 --- a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -123,10 +123,16 @@ struct MockWddmCsr : public WddmCommandStreamReceiver { } bool ret = true; if (DebugManager.flags.EnableDirectSubmission.get() == 1) { - directSubmission = std::make_unique< - MockWddmDirectSubmission>>(device, osContext); - ret = directSubmission->initialize(true); - this->dispatchMode = DispatchMode::ImmediateDispatch; + if (!initBlitterDirectSubmission) { + directSubmission = std::make_unique< + MockWddmDirectSubmission>>(device, osContext); + ret = directSubmission->initialize(true); + this->dispatchMode = DispatchMode::ImmediateDispatch; + } else { + blitterDirectSubmission = std::make_unique< + MockWddmDirectSubmission>>(device, osContext); + blitterDirectSubmission->initialize(true); + } } return ret; } @@ -135,6 +141,7 @@ struct MockWddmCsr : public WddmCommandStreamReceiver { std::unique_ptr recordedCommandBuffer = nullptr; bool callParentInitDirectSubmission = true; + bool initBlitterDirectSubmission = false; }; class WddmCommandStreamWithMockGdiFixture { @@ -1042,6 +1049,44 @@ TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnRcsWhenFlushi memoryManager->freeGraphicsMemory(commandBuffer); } +TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnBcsWhenFlushingCommandBufferThenExpectDirectSubmissionUsed) { + using Dispatcher = BlitterDispatcher; + using MockSubmission = + MockWddmDirectSubmission; + + DebugManager.flags.EnableDirectSubmission.set(1); + + auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true; + + std::unique_ptr osContext; + osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), + 0, device->getDeviceBitfield(), aub_stream::ENGINE_BCS, PreemptionMode::ThreadGroup, + false, false, false)); + csr->callParentInitDirectSubmission = false; + csr->initBlitterDirectSubmission = true; + bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); + EXPECT_TRUE(ret); + EXPECT_FALSE(csr->isDirectSubmissionEnabled()); + EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); + + GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); + ASSERT_NE(nullptr, commandBuffer); + LinearStream cs(commandBuffer); + BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, + nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), + &cs, commandBuffer->getUnderlyingBuffer()}; + csr->flush(batchBuffer, csr->getResidencyAllocations()); + auto directSubmission = reinterpret_cast(csr->blitterDirectSubmission.get()); + EXPECT_TRUE(directSubmission->ringStart); + size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed(); + size_t expectedSize = directSubmission->getSizeSemaphoreSection() + + Dispatcher::getSizePreemption() + + directSubmission->getSizeDispatch(); + EXPECT_EQ(expectedSize, actualDispatchSize); + memoryManager->freeGraphicsMemory(commandBuffer); +} + TEST_F(WddmCommandStreamTest, givenResidencyLoggingAvailableWhenFlushingCommandBufferThenNotifiesResidencyLogger) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 4dd63482a7..2366c61284 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -101,7 +101,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { bool initDirectSubmission(Device &device, OsContext &osContext) override; bool checkDirectSubmissionSupportsEngine(const DirectSubmissionProperties &directSubmissionProperty, aub_stream::EngineType contextEngineType, - bool &startOnInit); + bool &startOnInit, + bool &startInContext); protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 7f3d5560da..27c5c5d88d 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -938,9 +938,9 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; auto lock = obtainUniqueOwnership(); - + bool blitterDirectSubmission = this->isBlitterDirectSubmissionEnabled(); auto &commandStream = getCS(BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, profilingEnabled, PauseOnGpuProperties::featureEnabled(DebugManager.flags.PauseOnBlitCopy.get()), - *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex])); + blitterDirectSubmission, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex])); auto commandStreamStart = commandStream.getUsed(); auto newTaskCount = taskCount + 1; latestSentTaskCount = newTaskCount; @@ -998,8 +998,17 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont BlitCommandsHelper::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation); } - auto batchBufferEnd = reinterpret_cast(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END))); - *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; + void *endingCmdPtr = nullptr; + if (blitterDirectSubmission) { + endingCmdPtr = commandStream.getSpace(0); + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&commandStream, + 0ull, + false); + } else { + auto batchBufferEnd = reinterpret_cast( + commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END))); + *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; + } alignToCacheLine(commandStream); @@ -1009,7 +1018,7 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont } BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, - commandStream.getUsed(), &commandStream, nullptr}; + commandStream.getUsed(), &commandStream, endingCmdPtr}; flush(batchBuffer, getResidencyAllocations()); makeSurfacePackNonResident(getResidencyAllocations()); @@ -1108,7 +1117,8 @@ inline bool CommandStreamReceiverHw::initDirectSubmission(Device &dev bool submitOnInit = directSubmissionProperty.submitOnInit; bool engineSupported = checkDirectSubmissionSupportsEngine(directSubmissionProperty, contextEngineType, - submitOnInit); + submitOnInit, + startDirect); if (engineSupported && startDirect) { if (contextEngineType == aub_stream::ENGINE_BCS) { blitterDirectSubmission = DirectSubmissionHw>::create(device, osContext); @@ -1127,7 +1137,8 @@ inline bool CommandStreamReceiverHw::initDirectSubmission(Device &dev template inline bool CommandStreamReceiverHw::checkDirectSubmissionSupportsEngine(const DirectSubmissionProperties &directSubmissionProperty, aub_stream::EngineType contextEngineType, - bool &startOnInit) { + bool &startOnInit, + bool &startInContext) { bool supported = directSubmissionProperty.engineSupported; startOnInit = directSubmissionProperty.submitOnInit; if (contextEngineType == aub_stream::ENGINE_BCS) { @@ -1151,6 +1162,11 @@ inline bool CommandStreamReceiverHw::checkDirectSubmissionSupportsEng } } + //enable start in context only when default support is overridden and enabled + if (supported && !directSubmissionProperty.engineSupported) { + startInContext = true; + } + return supported; } diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 4f3512be33..813e49eb33 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -114,7 +114,7 @@ struct BlitCommandsHelper { static size_t estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, - bool debugPauseEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); + bool debugPauseEnabled, bool blitterDirectSubmission, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index c1127f7643..59a2cb8d60 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -92,7 +92,7 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const Vec3 size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, bool debugPauseEnabled, - const RootDeviceEnvironment &rootDeviceEnvironment) { + bool blitterDirectSubmission, const RootDeviceEnvironment &rootDeviceEnvironment) { size_t size = 0; for (auto &blitProperties : blitPropertiesContainer) { size += BlitCommandsHelper::estimateBlitCommandsSize(blitProperties.copySize, blitProperties.csrDependencies, @@ -101,7 +101,11 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert } size += MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); + if (blitterDirectSubmission) { + size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); + } else { + size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); + } if (debugPauseEnabled) { size += BlitCommandsHelper::getSizeForDebugPauseCommands();