diff --git a/runtime/command_stream/aub_command_stream_receiver_hw.inl b/runtime/command_stream/aub_command_stream_receiver_hw.inl index c102ab9830..dd1d46c9cd 100644 --- a/runtime/command_stream/aub_command_stream_receiver_hw.inl +++ b/runtime/command_stream/aub_command_stream_receiver_hw.inl @@ -33,6 +33,10 @@ namespace OCLRT { template AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw(const HardwareInfo &hwInfoIn) : BaseClass(hwInfoIn) { + this->dispatchMode = CommandStreamReceiver::DispatchMode::BatchedDispatch; + if (DebugManager.flags.CsrDispatchMode.get()) { + this->dispatchMode = (CommandStreamReceiver::DispatchMode)DebugManager.flags.CsrDispatchMode.get(); + } for (auto &engineInfo : engineInfoTable) { engineInfo.pLRCA = nullptr; engineInfo.ggttLRCA = 0u; diff --git a/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_aub_tests.cpp index 91c02ade74..0ba7644a5f 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_aub_tests.cpp @@ -91,6 +91,8 @@ HWTEST_P(AUBCopyBuffer, simple) { EXPECT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + pSrcMemory = ptrOffset(pSrcMemory, srcOffset); pDstMemory = ptrOffset(pDstMemory, dstOffset); diff --git a/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_rect_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_rect_aub_tests.cpp index 911df52816..9376393be8 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_rect_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_copy_buffer_rect_aub_tests.cpp @@ -115,6 +115,8 @@ HWTEST_P(AUBCopyBufferRect, simple) { nullptr); EXPECT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + // Verify Output, line by line uint8_t src[rowPitch * slicePitch]; memset(src, 0, sizeof(src)); diff --git a/unit_tests/aub_tests/command_queue/enqueue_fill_buffer_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_fill_buffer_aub_tests.cpp index 0610c001cd..3135951fbb 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_fill_buffer_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_fill_buffer_aub_tests.cpp @@ -79,6 +79,8 @@ HWTEST_P(AUBFillBuffer, simple) { event); ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + pDestMemory = reinterpret_cast((destBuffer->getGraphicsAllocation()->getGpuAddress())); // The memory under offset should be untouched diff --git a/unit_tests/aub_tests/command_queue/enqueue_fill_image_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_fill_image_aub_tests.cpp index e454fc3b2d..df63fe32b0 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_fill_image_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_fill_image_aub_tests.cpp @@ -228,6 +228,8 @@ HWTEST_P(AubFillImage, simple) { nullptr); EXPECT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + ((MemoryAllocation *)image->getGraphicsAllocation())->allowAubFileWrite = false; // disallow file overwrite from cpu in next enqueue calls size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; diff --git a/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index 3115823e41..7f8dac767d 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -170,6 +170,8 @@ HWTEST_P(AUBHelloWorldIntegrateTest, simple) { event); ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + // Compute our memory expecations based on kernel execution auto globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; auto sizeWritten = globalWorkItems * sizeof(float); @@ -301,6 +303,8 @@ HWTEST_P(AUBSimpleArgIntegrateTest, simple) { event); ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + // Compute our memory expecations based on kernel execution size_t globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; size_t sizeWritten = globalWorkItems * sizeof(int); diff --git a/unit_tests/aub_tests/command_stream/aub_command_stream_tests.cpp b/unit_tests/aub_tests/command_stream/aub_command_stream_tests.cpp index 89b91e5277..7d0a38e85e 100644 --- a/unit_tests/aub_tests/command_stream/aub_command_stream_tests.cpp +++ b/unit_tests/aub_tests/command_stream/aub_command_stream_tests.cpp @@ -69,7 +69,8 @@ struct AUBFixture : public AUBCommandStreamFixture, CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); CommandStreamReceiverHw::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS}; - pCommandStreamReceiver->flush(batchBuffer, engineOrdinal, nullptr); + ResidencyContainer allocationsForResidency; + pCommandStreamReceiver->flush(batchBuffer, engineOrdinal, &allocationsForResidency); auto mmioBase = AUBCommandStreamReceiverHw::getCsTraits(engineOrdinal).mmioBase; AUBCommandStreamFixture::expectMMIO(mmioBase + 0x2094, noopId); @@ -82,9 +83,11 @@ HWTEST_F(AUBcommandstreamTests, testFlushTwice) { CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); CommandStreamReceiverHw::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS}; - pCommandStreamReceiver->flush(batchBuffer, EngineType::ENGINE_RCS, nullptr); + ResidencyContainer allocationsForResidency; + pCommandStreamReceiver->flush(batchBuffer, EngineType::ENGINE_RCS, &allocationsForResidency); BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, false, false, pCS->getUsed(), pCS}; - pCommandStreamReceiver->flush(batchBuffer2, EngineType::ENGINE_RCS, nullptr); + ResidencyContainer allocationsForResidency2; + pCommandStreamReceiver->flush(batchBuffer2, EngineType::ENGINE_RCS, &allocationsForResidency); } HWTEST_F(AUBcommandstreamTests, testNoopIdRcs) { diff --git a/unit_tests/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp b/unit_tests/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp index 1bf637563e..6b2d27bb27 100644 --- a/unit_tests/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp +++ b/unit_tests/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp @@ -460,6 +460,8 @@ SKLTEST_F(AUBRunKernelIntegrateTest, deviceSideVme) { nullptr); ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + ((MemoryAllocation *)motionVectorBuffer->getGraphicsAllocation())->allowAubFileWrite = false; // disallow file overwrite from cpu in next enqueue calls ((MemoryAllocation *)residualsBuffer->getGraphicsAllocation())->allowAubFileWrite = false; // disallow file overwrite from cpu in next enqueue calls ((MemoryAllocation *)shapesBuffer->getGraphicsAllocation())->allowAubFileWrite = false; // disallow file overwrite from cpu in next enqueue calls diff --git a/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp b/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp index 0990db4036..1f244e095c 100644 --- a/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp +++ b/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp @@ -23,8 +23,10 @@ #include "runtime/command_stream/aub_command_stream_receiver_hw.h" #include "runtime/helpers/hw_info.h" #include "runtime/memory_manager/memory_manager.h" +#include "runtime/os_interface/debug_settings_manager.h" #include "test.h" #include "unit_tests/fixtures/device_fixture.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" using OCLRT::AUBCommandStreamReceiver; using OCLRT::AUBCommandStreamReceiverHw; @@ -37,9 +39,19 @@ using OCLRT::LinearStream; using OCLRT::MemoryManager; using OCLRT::ObjectNotResident; using OCLRT::platformDevices; +using OCLRT::DebugManager; typedef Test AubCommandStreamReceiverTests; +template +struct MockAubCsr : public AUBCommandStreamReceiverHw { + MockAubCsr(const HardwareInfo &hwInfoIn) : AUBCommandStreamReceiverHw(hwInfoIn){}; + + CommandStreamReceiver::DispatchMode peekDispatchMode() { + return this->dispatchMode; + } +}; + TEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedWithWrongGfxCoreFamilyThenNullPointerShouldBeReturned) { HardwareInfo hwInfo = *platformDevices[0]; GFXCORE_FAMILY family = hwInfo.pPlatform->eRenderCoreFamily; @@ -52,6 +64,19 @@ TEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreat const_cast(hwInfo.pPlatform)->eRenderCoreFamily = family; } +HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenItIsCreatedWithDefaultSettingsThenItHasBatchedDispatchModeEnabled) { + DebugManager.flags.CsrDispatchMode.set(0); + std::unique_ptr> aubCsr(new MockAubCsr(*platformDevices[0])); + EXPECT_EQ(CommandStreamReceiver::DispatchMode::BatchedDispatch, aubCsr->peekDispatchMode()); +} + +HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenItIsCreatedWithDebugSettingsThenItHasProperDispatchModeEnabled) { + DebugManagerStateRestore stateRestore; + DebugManager.flags.CsrDispatchMode.set(CommandStreamReceiver::DispatchMode::ImmediateDispatch); + std::unique_ptr> aubCsr(new MockAubCsr(*platformDevices[0])); + EXPECT_EQ(CommandStreamReceiver::DispatchMode::ImmediateDispatch, aubCsr->peekDispatchMode()); +} + HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedThenMemoryManagerIsNotNull) { HardwareInfo hwInfo; @@ -106,6 +131,7 @@ HWTEST_F(AubCommandStreamReceiverTests, flushShouldLeaveProperRingTailAlignment) BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, false, false, cs.getUsed(), &cs}; // First flush typically includes a preamble and chain to command buffer + csr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::ImmediateDispatch); csr->flush(batchBuffer, engineOrdinal, nullptr); EXPECT_EQ(0ull, csr->engineInfoTable[engineOrdinal].tailRingBuffer % ringTailAlignment);