diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index aaac7176e1..0ae5d1d05c 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -168,8 +168,8 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, DeviceQueueHw *devQueueHw = castToObject>(devQueue); TagNodeBase *hwTimeStamps = nullptr; - - auto commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); + CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); + auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); EventBuilder eventBuilder; setupEvent(eventBuilder, event, commandType); @@ -206,17 +206,17 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, BlitPropertiesContainer blitPropertiesContainer; if (this->context->getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, getGpgpuCommandStreamReceiver()); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver); } bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); - if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (!clearDependenciesForSubCapture) { - eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); + eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr); } - auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); + auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator(); size_t nodesCount = 0u; if (isCacheFlushCommand(commandType) || isMarkerWithProfiling) { @@ -231,7 +231,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (nodesCount > 0) { - obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, getGpgpuCommandStreamReceiver()); + obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, computeCommandStreamReceiver); csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); } } @@ -250,10 +250,10 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (!blockQueue && isOOQEnabled()) { - setupBarrierTimestampForBcsEngines(getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), timestampPacketDependencies); + setupBarrierTimestampForBcsEngines(computeCommandStreamReceiver.getOsContext().getEngineType(), timestampPacketDependencies); } - if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + if (eventBuilder.getEvent() && computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes); eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.auxToNonAuxNodes); @@ -267,9 +267,9 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, timestampPacketDependencies); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); - } else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + } else if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (CL_COMMAND_BARRIER == commandType) { - getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); + computeCommandStreamReceiver.requestStallingCommandsOnNextFlush(); } for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { @@ -288,8 +288,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (isMarkerWithProfiling) { if (numEventsInWaitList == 0) { - PipeControlArgs args(false); - MemorySynchronizationCommands::addPipeControl(commandStream, args); + computeCommandStreamReceiver.programComputeBarrierCommand(commandStream); } processDispatchForMarkerWithTimestampPacket(*this, &commandStream, eventsRequest, csrDeps); } @@ -305,7 +304,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) { for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) { - MigrationController::handleMigration(*this->context, getGpgpuCommandStreamReceiver(), arg.second); + MigrationController::handleMigration(*this->context, computeCommandStreamReceiver, arg.second); migratedMemory = true; } } @@ -315,7 +314,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) { - csrDeps.makeResident(getGpgpuCommandStreamReceiver()); + csrDeps.makeResident(computeCommandStreamReceiver); completionStamp = enqueueNonBlocked( surfacesForResidency, @@ -334,7 +333,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, getBcsForAuxTranslation()); if (parentKernel) { - getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true); + computeCommandStreamReceiver.setMediaVFEStateDirty(true); if (devQueueHw->getSchedulerReturnInstance() > 0) { waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); @@ -427,7 +426,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } } if (migratedMemory) { - getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); + computeCommandStreamReceiver.flushBatchedSubmissions(); } } diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 1da6b638c2..f78877b828 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -208,7 +208,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); if (isMarkerWithProfiling) { if (!eventsInWaitlist) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForSinglePipeControl(); + expectedSizeCS += commandQueue.getGpgpuCommandStreamReceiver().getCmdsSizeForComputeBarrierCommand(); } expectedSizeCS += 4 * EncodeStoreMMIO::size; } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index c815175021..08882df148 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/allocations_list.h" @@ -1044,6 +1045,26 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSinglePipeControl(), extendedCommandStreamSize); } + +HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMultiTileQueueWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndCrossTileBarrier) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; + csr.activePartitions = 2; + csr.activePartitionsConfig = 2; + csr.staticWorkPartitioningEnabled = true; + + MockKernelWithInternals mockKernel(*pClDevice); + DispatchInfo dispatchInfo; + MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); + dispatchInfo.setKernel(mockKernel.mockKernel); + multiDispatchInfo.push(dispatchInfo); + + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); + + EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + ImplicitScalingDispatch::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize); +} + HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWaitListThenSizeHasFourMMIOStores) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*pClDevice); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 5d75822898..7316267f80 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -17,6 +17,7 @@ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" +#include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" @@ -1375,3 +1376,288 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgram EXPECT_EQ(GmmHelper::decanonize(generalStateBase), sbaCmd.getGeneralStateBaseAddress()); EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize()); } + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFlushTaskIsCalledThenInitializePageTableManagerRegister) { + auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + auto csr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(csr); + + MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); + csr->pageTableManager.reset(pageTableManager); + MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); + csr2->pageTableManager.reset(pageTableManager2); + + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(1); + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr2, ::testing::_)).Times(0); + + auto memoryManager = pDevice->getMemoryManager(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + IndirectHeap cs(graphicsAllocation); + + EXPECT_FALSE(csr->pageTableManagerInitialized); + EXPECT_FALSE(csr2->pageTableManagerInitialized); + + DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); + + EXPECT_TRUE(csr->pageTableManagerInitialized); + EXPECT_FALSE(csr2->pageTableManagerInitialized); + + csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); + + EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(csr2, ::testing::_)).Times(1); + pDevice->resetCommandStreamReceiver(csr2); + csr2->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); + EXPECT_TRUE(csr2->pageTableManagerInitialized); + + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferThenPageTableManagerInitializedForProperCsr) { + auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(bcsCsr); + + MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); + bcsCsr->pageTableManager.reset(pageTableManager); + MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); + bcsCsr2->pageTableManager.reset(pageTableManager2); + + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(1); + EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(bcsCsr2, ::testing::_)).Times(0); + + auto memoryManager = pDevice->getMemoryManager(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); + EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); + + auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation + graphicsAllocation, //srcAllocation + 0, //dstOffset + 0, //srcOffset + 0, //copySize + 0, //srcRowPitch + 0, //srcSlicePitch + 0, //dstRowPitch + 0, //dstSlicePitch + bcsCsr->getClearColorAllocation() //clearColorAllocation + ); + BlitPropertiesContainer container; + container.push_back(blitProperties); + + bcsCsr->blitBuffer(container, true, false, *pDevice); + + EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); + EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); + + EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(bcsCsr2, ::testing::_)).Times(1); + pDevice->resetCommandStreamReceiver(bcsCsr2); + bcsCsr2->blitBuffer(container, true, false, *pDevice); + + EXPECT_TRUE(bcsCsr2->pageTableManagerInitialized); + + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferAndPageTableManagerInitializedThenNotInitializeAgain) { + auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(bcsCsr); + + MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); + bcsCsr->pageTableManager.reset(pageTableManager); + + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(1); + + auto memoryManager = pDevice->getMemoryManager(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); + + auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation + graphicsAllocation, //srcAllocation + 0, //dstOffset + 0, //srcOffset + 0, //copySize + 0, //srcRowPitch + 0, //srcSlicePitch + 0, //dstRowPitch + 0, //dstSlicePitch + bcsCsr->getClearColorAllocation() //clearColorAllocation + ); + BlitPropertiesContainer container; + container.push_back(blitProperties); + + bcsCsr->blitBuffer(container, true, false, *pDevice); + + EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); + + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(0); + bcsCsr->blitBuffer(container, true, false, *pDevice); + + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNullPageTableManagerWhenCallBlitBufferThenPageTableManagerIsNotInitialized) { + auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(bcsCsr); + + bcsCsr->pageTableManager.reset(nullptr); + bcsCsr2->pageTableManager.reset(nullptr); + + auto memoryManager = pDevice->getMemoryManager(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + + EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); + EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); + + auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation + graphicsAllocation, //srcAllocation + 0, //dstOffset + 0, //srcOffset + 0, //copySize + 0, //srcRowPitch + 0, //srcSlicePitch + 0, //dstRowPitch + 0, //dstSlicePitch + bcsCsr->getClearColorAllocation() //clearColorAllocation + ); + BlitPropertiesContainer container; + container.push_back(blitProperties); + + bcsCsr->blitBuffer(container, true, false, *pDevice); + + EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); + EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); + + pDevice->resetCommandStreamReceiver(bcsCsr2); + bcsCsr2->blitBuffer(container, true, false, *pDevice); + + EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); + + bcsCsr2->pageTableManagerInitialized = true; + EXPECT_NO_THROW(bcsCsr2->blitBuffer(container, true, false, *pDevice)); + + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenInitializingPageTableManagerRegisterFailsThenPageTableManagerIsNotInitialized) { + auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(csr); + + MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); + csr->pageTableManager.reset(pageTableManager); + + EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(2).WillRepeatedly(::testing::Return(GMM_ERROR)); + + auto memoryManager = pDevice->getMemoryManager(); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + IndirectHeap cs(graphicsAllocation); + + EXPECT_FALSE(csr->pageTableManagerInitialized); + + DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); + + EXPECT_FALSE(csr->pageTableManagerInitialized); + + csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); + + EXPECT_FALSE(csr->pageTableManagerInitialized); + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCsrIsMarkedWithNewResourceThenCallBatchedSubmission) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; + commandStreamReceiver.newResources = true; + + flushTask(commandStreamReceiver); + + EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesFromSingleSubdeviceThenCallBatchedSubmission) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; + commandStreamReceiver.wasSubmittedToSingleSubdevice = true; + + flushTask(commandStreamReceiver); + + EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesToSingleSubdeviceThenCallBatchedSubmission) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; + flushTaskFlags.useSingleSubdevice = true; + + flushTask(commandStreamReceiver); + + EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFlushOnGpuIdleThenCallBatchedSubmission) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; + commandStreamReceiver.useGpuIdleImplicitFlush = true; + commandStreamReceiver.taskCount = 1u; + *commandStreamReceiver.getTagAddress() = 1u; + + flushTask(commandStreamReceiver); + + EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); + + *commandStreamReceiver.getTagAddress() = 2u; +} + +using SingleRootDeviceCommandStreamReceiverTests = CommandStreamReceiverFlushTaskTests; + +HWTEST_F(SingleRootDeviceCommandStreamReceiverTests, givenMultipleEventInSingleRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithoutSubmissionThenSemaphoreWaitCommandIsNotProgrammed) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto deviceFactory = std::make_unique(1, 0); + auto device0 = deviceFactory->rootDevices[0]; + + auto mockCsr0 = new MockCommandStreamReceiver(*device0->executionEnvironment, device0->getRootDeviceIndex(), device0->getDeviceBitfield()); + + device0->resetCommandStreamReceiver(mockCsr0); + + cl_device_id devices[] = {device0}; + + auto context = std::make_unique(ClDeviceVector(devices, 1), false); + + auto pCmdQ0 = context.get()->getSpecialQueue(0u); + + Event event1(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 5, 15); + Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); + Event event3(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 4, 20); + UserEvent userEvent1(&pCmdQ0->getContext()); + + userEvent1.setStatus(CL_COMPLETE); + + cl_event eventWaitList[] = + { + &event1, + &event2, + &event3, + &userEvent1, + }; + cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); + + { + pCmdQ0->enqueueMarkerWithWaitList( + numEventsInWaitList, + eventWaitList, + nullptr); + + HardwareParse csHwParser; + csHwParser.parseCommands(pCmdQ0->getCS(0)); + auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); + + EXPECT_EQ(0u, semaphores.size()); + } +} diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index f5faf8d926..b669d5932e 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -14,7 +14,6 @@ #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" -#include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "opencl/source/helpers/hardware_commands_helper.h" @@ -1875,288 +1874,3 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutI mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release()); EXPECT_FALSE(mockCsr.waitForCompletionWithTimeout(false, 0, 1)); } - -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFlushTaskIsCalledThenInitializePageTableManagerRegister) { - auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - auto csr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(csr); - - MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); - csr->pageTableManager.reset(pageTableManager); - MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); - csr2->pageTableManager.reset(pageTableManager2); - - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(1); - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr2, ::testing::_)).Times(0); - - auto memoryManager = pDevice->getMemoryManager(); - auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - IndirectHeap cs(graphicsAllocation); - - EXPECT_FALSE(csr->pageTableManagerInitialized); - EXPECT_FALSE(csr2->pageTableManagerInitialized); - - DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); - - csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); - - EXPECT_TRUE(csr->pageTableManagerInitialized); - EXPECT_FALSE(csr2->pageTableManagerInitialized); - - csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); - - EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(csr2, ::testing::_)).Times(1); - pDevice->resetCommandStreamReceiver(csr2); - csr2->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); - EXPECT_TRUE(csr2->pageTableManagerInitialized); - - memoryManager->freeGraphicsMemory(graphicsAllocation); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferThenPageTableManagerInitializedForProperCsr) { - auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(bcsCsr); - - MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); - bcsCsr->pageTableManager.reset(pageTableManager); - MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); - bcsCsr2->pageTableManager.reset(pageTableManager2); - - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(1); - EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(bcsCsr2, ::testing::_)).Times(0); - - auto memoryManager = pDevice->getMemoryManager(); - auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - - EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); - EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); - - auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation - graphicsAllocation, //srcAllocation - 0, //dstOffset - 0, //srcOffset - 0, //copySize - 0, //srcRowPitch - 0, //srcSlicePitch - 0, //dstRowPitch - 0, //dstSlicePitch - bcsCsr->getClearColorAllocation() //clearColorAllocation - ); - BlitPropertiesContainer container; - container.push_back(blitProperties); - - bcsCsr->blitBuffer(container, true, false, *pDevice); - - EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); - EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); - - EXPECT_CALL(*pageTableManager2, initContextAuxTableRegister(bcsCsr2, ::testing::_)).Times(1); - pDevice->resetCommandStreamReceiver(bcsCsr2); - bcsCsr2->blitBuffer(container, true, false, *pDevice); - - EXPECT_TRUE(bcsCsr2->pageTableManagerInitialized); - - memoryManager->freeGraphicsMemory(graphicsAllocation); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferAndPageTableManagerInitializedThenNotInitializeAgain) { - auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(bcsCsr); - - MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); - bcsCsr->pageTableManager.reset(pageTableManager); - - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(1); - - auto memoryManager = pDevice->getMemoryManager(); - auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - - EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); - - auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation - graphicsAllocation, //srcAllocation - 0, //dstOffset - 0, //srcOffset - 0, //copySize - 0, //srcRowPitch - 0, //srcSlicePitch - 0, //dstRowPitch - 0, //dstSlicePitch - bcsCsr->getClearColorAllocation() //clearColorAllocation - ); - BlitPropertiesContainer container; - container.push_back(blitProperties); - - bcsCsr->blitBuffer(container, true, false, *pDevice); - - EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); - - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(bcsCsr, ::testing::_)).Times(0); - bcsCsr->blitBuffer(container, true, false, *pDevice); - - memoryManager->freeGraphicsMemory(graphicsAllocation); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNullPageTableManagerWhenCallBlitBufferThenPageTableManagerIsNotInitialized) { - auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(bcsCsr); - - bcsCsr->pageTableManager.reset(nullptr); - bcsCsr2->pageTableManager.reset(nullptr); - - auto memoryManager = pDevice->getMemoryManager(); - auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - - EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); - EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); - - auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation - graphicsAllocation, //srcAllocation - 0, //dstOffset - 0, //srcOffset - 0, //copySize - 0, //srcRowPitch - 0, //srcSlicePitch - 0, //dstRowPitch - 0, //dstSlicePitch - bcsCsr->getClearColorAllocation() //clearColorAllocation - ); - BlitPropertiesContainer container; - container.push_back(blitProperties); - - bcsCsr->blitBuffer(container, true, false, *pDevice); - - EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); - EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); - - pDevice->resetCommandStreamReceiver(bcsCsr2); - bcsCsr2->blitBuffer(container, true, false, *pDevice); - - EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); - - bcsCsr2->pageTableManagerInitialized = true; - EXPECT_NO_THROW(bcsCsr2->blitBuffer(container, true, false, *pDevice)); - - memoryManager->freeGraphicsMemory(graphicsAllocation); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenInitializingPageTableManagerRegisterFailsThenPageTableManagerIsNotInitialized) { - auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - pDevice->resetCommandStreamReceiver(csr); - - MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); - csr->pageTableManager.reset(pageTableManager); - - EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(2).WillRepeatedly(::testing::Return(GMM_ERROR)); - - auto memoryManager = pDevice->getMemoryManager(); - auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - IndirectHeap cs(graphicsAllocation); - - EXPECT_FALSE(csr->pageTableManagerInitialized); - - DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); - - csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); - - EXPECT_FALSE(csr->pageTableManagerInitialized); - - csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); - - EXPECT_FALSE(csr->pageTableManagerInitialized); - memoryManager->freeGraphicsMemory(graphicsAllocation); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCsrIsMarkedWithNewResourceThenCallBatchedSubmission) { - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; - commandStreamReceiver.newResources = true; - - flushTask(commandStreamReceiver); - - EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesFromSingleSubdeviceThenCallBatchedSubmission) { - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; - commandStreamReceiver.wasSubmittedToSingleSubdevice = true; - - flushTask(commandStreamReceiver); - - EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesToSingleSubdeviceThenCallBatchedSubmission) { - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; - flushTaskFlags.useSingleSubdevice = true; - - flushTask(commandStreamReceiver); - - EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); -} - -HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFlushOnGpuIdleThenCallBatchedSubmission) { - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; - commandStreamReceiver.useGpuIdleImplicitFlush = true; - commandStreamReceiver.taskCount = 1u; - *commandStreamReceiver.getTagAddress() = 1u; - - flushTask(commandStreamReceiver); - - EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); - - *commandStreamReceiver.getTagAddress() = 2u; -} - -using SingleRootDeviceCommandStreamReceiverTests = CommandStreamReceiverFlushTaskTests; - -HWTEST_F(SingleRootDeviceCommandStreamReceiverTests, givenMultipleEventInSingleRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithoutSubmissionThenSemaphoreWaitCommandIsNotProgrammed) { - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - - auto deviceFactory = std::make_unique(1, 0); - auto device0 = deviceFactory->rootDevices[0]; - - auto mockCsr0 = new MockCommandStreamReceiver(*device0->executionEnvironment, device0->getRootDeviceIndex(), device0->getDeviceBitfield()); - - device0->resetCommandStreamReceiver(mockCsr0); - - cl_device_id devices[] = {device0}; - - auto context = std::make_unique(ClDeviceVector(devices, 1), false); - - auto pCmdQ0 = context.get()->getSpecialQueue(0u); - - Event event1(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); - Event event3(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - UserEvent userEvent1(&pCmdQ0->getContext()); - - userEvent1.setStatus(CL_COMPLETE); - - cl_event eventWaitList[] = - { - &event1, - &event2, - &event3, - &userEvent1, - }; - cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); - - { - pCmdQ0->enqueueMarkerWithWaitList( - numEventsInWaitList, - eventWaitList, - nullptr); - - HardwareParse csHwParser; - csHwParser.parseCommands(pCmdQ0->getCS(0)); - auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); - - EXPECT_EQ(0u, semaphores.size()); - } -} diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index 86ca0e1ef6..a529751b64 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -72,7 +72,9 @@ class MockCommandStreamReceiverHW : public UltCommandStreamReceiver LinearStream *stream = nullptr; }; -HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnabledThenPipeControllAddedBeforeWritingTimestamp) { +HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnabledThenPipeControlAddedBeforeWritingTimestamp) { + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto commandStreamReceiver = std::make_unique>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); auto commandStreamReceiverPtr = commandStreamReceiver.get(); commandStreamReceiver->timestampPacketWriteEnabled = true; @@ -86,10 +88,62 @@ HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnab HardwareParse hwParser; hwParser.parseCommands(*(commandStreamReceiverPtr->stream), 0); - auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + GenCmdList storeRegMemList = hwParser.getCommandsList(); + EXPECT_EQ(4u, storeRegMemList.size()); + auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); - auto pipeControlIt = find(hwParser.cmdList.begin(), storeRegMemIt); + auto pipeControlIt = find(hwParser.cmdList.begin(), storeRegMemIt); EXPECT_NE(storeRegMemIt, pipeControlIt); + EXPECT_NE(hwParser.cmdList.end(), pipeControlIt); + + clReleaseEvent(event); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnabledOnMultiTileCommandQueueThenCrossTileBarrierAddedBeforeWritingTimestamp) { + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto commandStreamReceiver = std::make_unique>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + auto commandStreamReceiverPtr = commandStreamReceiver.get(); + commandStreamReceiver->timestampPacketWriteEnabled = true; + commandStreamReceiver->activePartitions = 2; + commandStreamReceiver->activePartitionsConfig = 2; + commandStreamReceiver->staticWorkPartitioningEnabled = true; + + device->resetCommandStreamReceiver(commandStreamReceiver.release()); + *ptrOffset(commandStreamReceiverPtr->tagAddress, commandStreamReceiverPtr->postSyncWriteOffset) = *commandStreamReceiverPtr->tagAddress; + + auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); + cmdQ->setProfilingEnabled(); + + cl_event event; + cmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); + + HardwareParse hwParser; + hwParser.parseCommands(*(commandStreamReceiverPtr->stream), 0); + GenCmdList storeRegMemList = hwParser.getCommandsList(); + EXPECT_EQ(4u, storeRegMemList.size()); + auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); + GenCmdList::reverse_iterator rItorStoreRegMemIt(storeRegMemIt); + auto pipeControlIt = reverse_find(rItorStoreRegMemIt, hwParser.cmdList.rbegin()); + auto pipeControl = genCmdCast(*pipeControlIt); + EXPECT_NE(nullptr, pipeControl); + + GenCmdList::iterator cmdIt = pipeControlIt.base(); + auto miAtomic = genCmdCast(*cmdIt); + EXPECT_NE(nullptr, miAtomic); + + cmdIt++; + auto miSemaphore = genCmdCast(*cmdIt); + EXPECT_NE(nullptr, miSemaphore); + + cmdIt++; + auto bbStart = genCmdCast(*cmdIt); + EXPECT_NE(nullptr, bbStart); clReleaseEvent(event); } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 80570cd272..af6d4734d1 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -533,6 +533,11 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { size_t getCmdsSizeForHardwareContext() const override { return 0; } + void programComputeBarrierCommand(LinearStream &cmdStream) override { + } + size_t getCmdsSizeForComputeBarrierCommand() const override { + return 0; + } GraphicsAllocation *getClearColorAllocation() override { return nullptr; } bool createPreemptionAllocation() override { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 968f8ebdce..24f94017f5 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -302,6 +302,13 @@ class CommandStreamReceiver { return postSyncWriteOffset; } + inline bool isMultiTileOperationEnabled() const { + return (activePartitions > 1) && staticWorkPartitioningEnabled; + } + + virtual void programComputeBarrierCommand(LinearStream &cmdStream) = 0; + virtual size_t getCmdsSizeForComputeBarrierCommand() const = 0; + protected: void cleanupResources(); void printDeviceIndex(); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 2055ea6750..cd93645343 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -135,6 +135,13 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void postInitFlagsSetup() override; void programActivePartitionConfig(LinearStream &csr); + void programComputeBarrierCommand(LinearStream &cmdStream) override { + programStallingNoPostSyncCommandsForBarrier(cmdStream); + } + size_t getCmdsSizeForComputeBarrierCommand() const override { + return getCmdSizeForStallingNoPostSyncCommands(); + } + protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, uint32_t &newL3Config); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 50c2a5773a..a1dd0f719f 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -218,7 +218,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( args.notifyEnable = isUsedNotifyEnableForPostSync(); args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired; args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush; - args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled; + args.workloadPartitionOffset = isMultiTileOperationEnabled(); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, @@ -1192,7 +1192,7 @@ void CommandStreamReceiverHw::flushPipeControl() { PipeControlArgs args(true); args.notifyEnable = isUsedNotifyEnableForPostSync(); - args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled; + args.workloadPartitionOffset = isMultiTileOperationEnabled(); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, getTagAllocation()->getGpuAddress(), diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index 0106a19614..2f8ab1adae 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -197,7 +197,7 @@ inline void CommandStreamReceiverHw::addPipeControlBeforeStateSip(Lin template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyncCommands() const { - if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) { + if (isMultiTileOperationEnabled()) { return ImplicitScalingDispatch::getBarrierSize(peekHwInfo(), false, false); @@ -208,7 +208,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyn template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { - if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) { + if (isMultiTileOperationEnabled()) { return ImplicitScalingDispatch::getBarrierSize(peekHwInfo(), false, true); @@ -220,7 +220,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncC template inline void CommandStreamReceiverHw::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) { PipeControlArgs args; - if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) { + if (isMultiTileOperationEnabled()) { ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, args, @@ -238,7 +238,7 @@ template inline void CommandStreamReceiverHw::programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode) { auto barrierTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(tagNode); PipeControlArgs args(true); - if (this->activePartitions > 1 && this->staticWorkPartitioningEnabled) { + if (isMultiTileOperationEnabled()) { args.workloadPartitionOffset = true; ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index b46ad34acf..5771b500f0 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -107,6 +107,13 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { return 0; } + void programComputeBarrierCommand(LinearStream &cmdStream) override { + programComputeBarrierCommandCalled = true; + } + size_t getCmdsSizeForComputeBarrierCommand() const override { + return 0; + } + bool createPreemptionAllocation() override { if (createPreemptionAllocationParentCall) { return CommandStreamReceiver::createPreemptionAllocation(); @@ -139,6 +146,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { bool programHardwareContextCalled = false; bool createPreemptionAllocationReturn = true; bool createPreemptionAllocationParentCall = false; + bool programComputeBarrierCommandCalled = false; }; class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {