From 09923fcb395a74d02cfbfff2b62acf2f9c77e8ce Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Tue, 27 Mar 2018 09:24:26 +0200 Subject: [PATCH] [17/n] Internal 4GB allocator. - Make sure that blocks ISA is made resident - both blocked & non blocked path - fix a bug where private surface was not made resident in blocked path. Change-Id: Ie564595b176b94ecc7c79d7efeae20598c5874fb --- runtime/command_queue/enqueue_common.h | 9 +- runtime/helpers/task_information.cpp | 2 + runtime/program/block_kernel_manager.cpp | 14 ++++ runtime/program/block_kernel_manager.h | 3 + .../enqueue_execution_model_kernel_tests.cpp | 84 ++++++++++++++++++- .../libult/ult_command_stream_receiver.h | 4 + 6 files changed, 107 insertions(+), 9 deletions(-) diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index ef582ba80f..740999342f 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -306,14 +306,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, // Update SLM usage slmUsed |= scheduler.slmTotalSize > 0; - size_t count = parentKernel->getProgram()->getBlockKernelManager()->getCount(); - - for (uint32_t surfaceIndex = 0; surfaceIndex < count; surfaceIndex++) { - auto surface = parentKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(surfaceIndex); - if (surface) { - commandStreamReceiver.makeResident(*surface); - } - } + parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver); } auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true; diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index 9dab878acc..73c066e3a9 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -249,6 +249,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate // Update SLM usage slmUsed |= scheduler.slmTotalSize > 0; + + this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver); } DispatchFlags dispatchFlags; diff --git a/runtime/program/block_kernel_manager.cpp b/runtime/program/block_kernel_manager.cpp index e82986f6d7..7353841622 100644 --- a/runtime/program/block_kernel_manager.cpp +++ b/runtime/program/block_kernel_manager.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "runtime/command_stream/command_stream_receiver.h" #include "runtime/helpers/debug_helpers.h" #include "runtime/program/block_kernel_manager.h" #include "runtime/program/kernel_info.h" @@ -61,4 +62,17 @@ GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) { return blockPrivateSurfaceArray[ordinal]; return nullptr; } +void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) { + auto blockCount = blockKernelInfoArray.size(); + for (uint32_t surfaceIndex = 0; surfaceIndex < blockCount; surfaceIndex++) { + auto surface = getPrivateSurface(surfaceIndex); + if (surface) { + commandStreamReceiver.makeResident(*surface); + } + surface = blockKernelInfoArray[surfaceIndex]->getGraphicsAllocation(); + if (surface) { + commandStreamReceiver.makeResident(*surface); + } + } +} } // namespace OCLRT \ No newline at end of file diff --git a/runtime/program/block_kernel_manager.h b/runtime/program/block_kernel_manager.h index bc42c278da..e92cca1906 100644 --- a/runtime/program/block_kernel_manager.h +++ b/runtime/program/block_kernel_manager.h @@ -26,6 +26,7 @@ namespace OCLRT { class GraphicsAllocation; +class CommandStreamReceiver; struct KernelInfo; class BlockKernelManager { @@ -44,6 +45,8 @@ class BlockKernelManager { void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal); GraphicsAllocation *getPrivateSurface(size_t ordinal); + void makeInternalAllocationsResident(CommandStreamReceiver &); + protected: bool blockUsesPrintf = false; std::vector blockKernelInfoArray; diff --git a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp index 8af42453d5..79f498665e 100644 --- a/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/unit_tests/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -125,7 +125,6 @@ HWTEST_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDS } HWTEST_P(ParentKernelEnqueueTest, GivenParentKernelWithPrivateSurfaceWhenEnqueueKernelCalledThenResidencyCountIncreased) { - if (pDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; @@ -141,6 +140,89 @@ HWTEST_P(ParentKernelEnqueueTest, GivenParentKernelWithPrivateSurfaceWhenEnqueue } } +HWTEST_P(ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) { + if (pDevice->getSupportedClVersion() >= 20) { + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + auto privateAllocation = csr.getMemoryManager()->allocateGraphicsMemory(10); + blockKernelManager->pushPrivateSurface(privateAllocation, 0); + + UserEvent uEvent(pContext); + auto clEvent = static_cast(&uEvent); + + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); + + EXPECT_FALSE(csr.isMadeResident(privateAllocation)); + uEvent.setStatus(CL_COMPLETE); + EXPECT_TRUE(csr.isMadeResident(privateAllocation)); + } +} + +HWTEST_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) { + if (pDevice->getSupportedClVersion() >= 20) { + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); + + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); + } + } +} + +HWTEST_P(ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) { + if (pDevice->getSupportedClVersion() >= 20) { + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + blockKernelManager->makeInternalAllocationsResident(csr); + + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); + } + } +} + +HWTEST_P(ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) { + if (pDevice->getSupportedClVersion() >= 20) { + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + + UserEvent uEvent(pContext); + auto clEvent = static_cast(&uEvent); + + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); + + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); + } + + uEvent.setStatus(CL_COMPLETE); + + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); + } + } +} + HWTEST_P(ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index ce140fe53e..b21f5bd3f5 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -99,6 +99,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { BaseClass::makeResident(gfxAllocation); } + bool isMadeResident(GraphicsAllocation *graphicsAllocation) { + return makeResidentAllocations.find(graphicsAllocation) != makeResidentAllocations.end(); + } + std::map makeResidentAllocations; bool storeMakeResidentAllocations;