From 8254d6a08104d8c4a40ae98005146c80243aec2e Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Fri, 9 Mar 2018 14:48:42 +0100 Subject: [PATCH] Ensure that submissions are flushed prior to csr destruction. Change-Id: Ie04de561d3d295f40f55a19f01274d873d259abd --- .../command_stream_receiver_hw.inl | 5 +- runtime/device/device.cpp | 8 ++- .../windows/wddm_memory_manager.cpp | 2 + .../enqueue_kernel_aub_tests.cpp | 10 ++-- .../enqueue_kernel_two_ooq_tests.cpp | 3 ++ .../enqueue_write_buffer_tests.cpp | 2 + unit_tests/command_queue/ooq_task_tests.cpp | 49 +++++++++++++++---- .../command_stream_receiver_hw_tests.cpp | 1 + unit_tests/device/device_tests.cpp | 12 +++++ unit_tests/mocks/mock_csr.h | 4 ++ 10 files changed, 78 insertions(+), 18 deletions(-) diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index 0d1bb16d77..4f59a1da24 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -454,8 +454,9 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { } //make sure we flush DC - ((PIPE_CONTROL *)epiloguePipeControlLocation)->setDcFlushEnable(true); - + if (epiloguePipeControlLocation) { + ((PIPE_CONTROL *)epiloguePipeControlLocation)->setDcFlushEnable(true); + } auto flushStamp = this->flush(primaryCmdBuffer->batchBuffer, engineType, &surfacesForSubmit); //after flush task level is closed diff --git a/runtime/device/device.cpp b/runtime/device/device.cpp index ea61f44f8c..4b95b08784 100644 --- a/runtime/device/device.cpp +++ b/runtime/device/device.cpp @@ -92,8 +92,12 @@ Device::~Device() { if (performanceCounters) { performanceCounters->shutdown(); } - delete commandStreamReceiver; - commandStreamReceiver = nullptr; + if (commandStreamReceiver) { + commandStreamReceiver->flushBatchedSubmissions(); + delete commandStreamReceiver; + commandStreamReceiver = nullptr; + } + if (memoryManager) { if (preemptionAllocation) { memoryManager->freeGraphicsMemory(preemptionAllocation); diff --git a/runtime/os_interface/windows/wddm_memory_manager.cpp b/runtime/os_interface/windows/wddm_memory_manager.cpp index c8dcec2182..9005df90ec 100644 --- a/runtime/os_interface/windows/wddm_memory_manager.cpp +++ b/runtime/os_interface/windows/wddm_memory_manager.cpp @@ -278,6 +278,8 @@ void WddmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation releaseResidencyLock(); + UNRECOVERABLE_IF(gfxAllocation->taskCount != ObjectNotUsed && this->device && this->device->peekCommandStreamReceiver() && gfxAllocation->taskCount > *this->device->getCommandStreamReceiver().getTagAddress()); + if (input->gmm) { if (input->gmm->isRenderCompressed) { auto status = wddm->updateAuxTable(input->gpuPtr, input->gmm, false); diff --git a/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index cc688fbd9f..c757065ab8 100644 --- a/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/unit_tests/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -105,6 +105,8 @@ HWTEST_F(AUBHelloWorld, simple) { ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); + parseCommands(*pCmdQ); auto *pWalker = reinterpret_cast(cmdWalker); @@ -242,6 +244,8 @@ HWTEST_F(AUBSimpleArg, simple) { parseCommands(*pCmdQ); + pCmdQ->flush(); + auto *pWalker = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, pWalker); auto alignmentIDSA = 32 * sizeof(uint8_t); @@ -293,6 +297,7 @@ HWTEST_F(AUBSimpleArg, givenAubCommandStreamerReceiverWhenBatchBufferFlateningIs event); ASSERT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); } struct AUBSimpleArgIntegrateTest : public SimpleArgFixture, @@ -352,9 +357,4 @@ INSTANTIATE_TEST_CASE_P( ::testing::Combine( ::testing::ValuesIn(TestSimdTable), ::testing::ValuesIn(TestParamTable))); - -typedef TwoWalkerTest AUBTwoWalker; - -TEST_F(AUBTwoWalker, simple) { -} } // namespace ULT diff --git a/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp b/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp index c6089414b6..b24f37790d 100644 --- a/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp @@ -93,6 +93,9 @@ struct TwoOOQsTwoDependentWalkers : public HelloWorldTest, ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse::parseCommands(*pCmdQ2); + pCmdQ->flush(); + pCmdQ2->flush(); + Event *E1 = castToObject(event1); ASSERT_NE(nullptr, E1); Event *E2 = castToObject(event2); diff --git a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp index e1adeb92fd..33b3bce43d 100644 --- a/unit_tests/command_queue/enqueue_write_buffer_tests.cpp +++ b/unit_tests/command_queue/enqueue_write_buffer_tests.cpp @@ -344,6 +344,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPt EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); + pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrZeroCopyBufferWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; @@ -363,6 +364,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstP EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); + pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; diff --git a/unit_tests/command_queue/ooq_task_tests.cpp b/unit_tests/command_queue/ooq_task_tests.cpp index c72d362555..2ad120cf4d 100644 --- a/unit_tests/command_queue/ooq_task_tests.cpp +++ b/unit_tests/command_queue/ooq_task_tests.cpp @@ -36,36 +36,66 @@ struct OOQTaskTypedTests : public HelloWorldTest { TYPED_TEST_CASE_P(OOQTaskTypedTests); -TYPED_TEST_P(OOQTaskTypedTests, doesntChangeTaskLevel) { +bool isBlockingCall(unsigned int cmdType) { + if (cmdType == CL_COMMAND_WRITE_BUFFER || + cmdType == CL_COMMAND_READ_BUFFER || + cmdType == CL_COMMAND_WRITE_IMAGE || + cmdType == CL_COMMAND_READ_IMAGE) { + return true; + } else { + return false; + } +} + +TYPED_TEST_P(OOQTaskTypedTests, givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange) { + auto &commandStreamReceiver = this->pDevice->getCommandStreamReceiver(); + auto tagAddress = commandStreamReceiver.getTagAddress(); + + auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); + auto taskLevelClosed = blockingCall ? 1u : 0u; // for blocking commands task level will be closed + + //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place + if (!blockingCall) { + *tagAddress = 0; + } + auto previousTaskLevel = this->pCmdQ->taskLevel; - auto taskLevelClosed = 0u; // for blocking commands task level will be closed + if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); - taskLevelClosed = 1u; + this->pCmdQ->flush(); + } else { TypeParam::enqueue(this->pCmdQ, nullptr); } - if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_IMAGE || TypeParam::Traits::cmdType == CL_COMMAND_READ_IMAGE) { - taskLevelClosed = 1u; - } EXPECT_EQ(previousTaskLevel + taskLevelClosed, this->pCmdQ->taskLevel); + *tagAddress = initialHardwareTag; } -TYPED_TEST_P(OOQTaskTypedTests, changesTaskCount) { +TYPED_TEST_P(OOQTaskTypedTests, givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated) { auto &commandStreamReceiver = this->pDevice->getCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); + auto tagAddress = commandStreamReceiver.getTagAddress(); + auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); + + //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place + if (!blockingCall) { + *tagAddress = 0; + } if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); + this->pCmdQ->flush(); } else { TypeParam::enqueue(this->pCmdQ, nullptr); } EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(this->pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); + *tagAddress = initialHardwareTag; } typedef ::testing::Types< @@ -80,8 +110,8 @@ typedef ::testing::Types< EnqueueParams; REGISTER_TYPED_TEST_CASE_P(OOQTaskTypedTests, - doesntChangeTaskLevel, - changesTaskCount); + givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange, + givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated); // Instantiate all of these parameterized tests INSTANTIATE_TYPED_TEST_CASE_P(OOQ, OOQTaskTypedTests, EnqueueParams); @@ -248,6 +278,7 @@ TEST_F(OOQTaskTests, enqueueReadBuffer_blockingAndNonBlockedOnUserEvent) { retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); + pCmdQ->flush(); alignedFree(alignedReadPtr); } diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index 3adf578252..d6c0027f01 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -107,6 +107,7 @@ struct UltCommandStreamReceiverTest } void TearDown() override { + pDevice->getCommandStreamReceiver().flushBatchedSubmissions(); delete dsh.getGraphicsAllocation(); delete ih.getGraphicsAllocation(); delete ioh.getGraphicsAllocation(); diff --git a/unit_tests/device/device_tests.cpp b/unit_tests/device/device_tests.cpp index 2f78c29a7d..dcffd982f6 100644 --- a/unit_tests/device/device_tests.cpp +++ b/unit_tests/device/device_tests.cpp @@ -27,6 +27,7 @@ #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/memory_management_fixture.h" #include "unit_tests/mocks/mock_context.h" +#include "unit_tests/mocks/mock_csr.h" #include "unit_tests/libult/create_command_stream.h" #include "test.h" #include @@ -144,3 +145,14 @@ TEST(DeviceCreation, givenDeviceWithUsedTagAllocationWhenItIsDestroyedThenThereA auto tagAllocation = device->peekTagAllocation(); tagAllocation->taskCount = 1; } + +TEST(DeviceCleanup, givenDeviceWhenItIsDestroyedThenFlushBatchedSubmissionsIsCalled) { + auto mockDevice = std::unique_ptr(MockDevice::create(nullptr)); + MockCommandStreamReceiver *csr = new MockCommandStreamReceiver; + mockDevice->resetCommandStreamReceiver(csr); + int flushedBatchedSubmissionsCalledCount = 0; + csr->flushBatchedSubmissionsCallCounter = &flushedBatchedSubmissionsCalledCount; + mockDevice.reset(nullptr); + + EXPECT_EQ(1, flushedBatchedSubmissionsCalledCount); +} \ No newline at end of file diff --git a/unit_tests/mocks/mock_csr.h b/unit_tests/mocks/mock_csr.h index 024411053e..7aee409256 100644 --- a/unit_tests/mocks/mock_csr.h +++ b/unit_tests/mocks/mock_csr.h @@ -200,6 +200,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { using CommandStreamReceiver::latestSentTaskCount; using CommandStreamReceiver::tagAddress; std::vector instructionHeapReserveredData; + int *flushBatchedSubmissionsCallCounter = nullptr; ~MockCommandStreamReceiver() { } @@ -217,6 +218,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { DispatchFlags &dispatchFlags) override; void flushBatchedSubmissions() override { + if (flushBatchedSubmissionsCallCounter) { + (*flushBatchedSubmissionsCallCounter)++; + } } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override {