diff --git a/runtime/command_stream/command_stream_receiver_hw.h b/runtime/command_stream/command_stream_receiver_hw.h index f154230b49..5a74c36874 100644 --- a/runtime/command_stream/command_stream_receiver_hw.h +++ b/runtime/command_stream/command_stream_receiver_hw.h @@ -91,7 +91,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags); - void programEnginePrologue(LinearStream &csr, const DispatchFlags &dispatchFlags); + void programEnginePrologue(LinearStream &csr); size_t getCmdSizeForPrologue(const DispatchFlags &dispatchFlags) const; void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd); diff --git a/runtime/command_stream/command_stream_receiver_hw_base.inl b/runtime/command_stream/command_stream_receiver_hw_base.inl index 595e8c7073..03c1a5a3a0 100644 --- a/runtime/command_stream/command_stream_receiver_hw_base.inl +++ b/runtime/command_stream/command_stream_receiver_hw_base.inl @@ -261,7 +261,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (executionEnvironment.rootDeviceEnvironments[device.getRootDeviceIndex()]->pageTableManager.get() && !pageTableManagerInitialized) { pageTableManagerInitialized = executionEnvironment.rootDeviceEnvironments[device.getRootDeviceIndex()]->pageTableManager->initPageTableManagerRegisters(this); } - programEnginePrologue(commandStreamCSR, dispatchFlags); + programEnginePrologue(commandStreamCSR); programComputeMode(commandStreamCSR, dispatchFlags); programL3(commandStreamCSR, dispatchFlags, newL3Config); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); @@ -821,6 +821,8 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont auto newTaskCount = taskCount + 1; latestSentTaskCount = newTaskCount; + programEnginePrologue(commandStream); + for (auto &blitProperties : blitPropertiesContainer) { TimestampPacketHelper::programCsrDependencies(commandStream, blitProperties.csrDependencies); @@ -838,6 +840,8 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont makeResident(*blitProperties.dstAllocation); } + HardwareCommandsHelper::programGlobalFence(commandStream); + HardwareCommandsHelper::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount); auto batchBufferEnd = reinterpret_cast(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END))); @@ -846,6 +850,9 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont alignToCacheLine(commandStream); makeResident(*tagAllocation); + if (globalFenceAllocation) { + makeResident(*globalFenceAllocation); + } BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, commandStream.getUsed(), &commandStream}; @@ -888,7 +895,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForEpilogue(const Di return 0u; } template -inline void CommandStreamReceiverHw::programEnginePrologue(LinearStream &csr, const DispatchFlags &dispatchFlags) { +inline void CommandStreamReceiverHw::programEnginePrologue(LinearStream &csr) { } template diff --git a/runtime/helpers/hardware_commands_helper.h b/runtime/helpers/hardware_commands_helper.h index e4d60b29a4..a20c1393f3 100644 --- a/runtime/helpers/hardware_commands_helper.h +++ b/runtime/helpers/hardware_commands_helper.h @@ -149,6 +149,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode); + static void programGlobalFence(LinearStream &commandStream); static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData); static void appendMiFlushDw(typename GfxFamily::MI_FLUSH_DW *miFlushDwCmd); static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); diff --git a/runtime/helpers/hardware_commands_helper.inl b/runtime/helpers/hardware_commands_helper.inl index f28489c15d..823a47a29a 100644 --- a/runtime/helpers/hardware_commands_helper.inl +++ b/runtime/helpers/hardware_commands_helper.inl @@ -490,4 +490,8 @@ void HardwareCommandsHelper::programMiFlushDw(LinearStream &commandSt miFlushDwCmd->setImmediateData(immediateData); appendMiFlushDw(miFlushDwCmd); } + +template +void HardwareCommandsHelper::programGlobalFence(LinearStream &commandStream) { +} } // namespace NEO diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index 535ff9b568..74ce9d4d70 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -612,6 +612,50 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations EXPECT_EQ(5u, csr.makeResidentAllocations.size()); } +HWTEST_F(BcsTests, givenLocalMemoryEnabledWhenBlitDispatchedThenMakeAllAllocationsResident) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableLocalMemory.set(true); + + auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false)); + auto bcsCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); + bcsCsr->setupContext(*bcsOsContext); + bcsCsr->initializeTagAllocation(); + bcsCsr->createGlobalFenceAllocation(); + bcsCsr->storeMakeResidentAllocations = true; + + cl_int retVal = CL_SUCCESS; + auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + void *hostPtr1 = reinterpret_cast(0x12340000); + void *hostPtr2 = reinterpret_cast(0x43210000); + + EXPECT_EQ(0u, bcsCsr->makeSurfacePackNonResidentCalled); + + auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + *bcsCsr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, + buffer1->getGraphicsAllocation()->getGpuAddress(), 0, + 0, 0, 1); + + auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + *bcsCsr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, + buffer2->getGraphicsAllocation()->getGpuAddress(), 0, + 0, 0, 1); + + BlitPropertiesContainer blitPropertiesContainer; + blitPropertiesContainer.push_back(blitProperties1); + blitPropertiesContainer.push_back(blitProperties2); + + bcsCsr->blitBuffer(blitPropertiesContainer, false); + + EXPECT_TRUE(bcsCsr->isMadeResident(buffer1->getGraphicsAllocation())); + EXPECT_TRUE(bcsCsr->isMadeResident(buffer2->getGraphicsAllocation())); + EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->getTagAllocation())); + EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->globalFenceAllocation)); + EXPECT_EQ(1u, bcsCsr->makeSurfacePackNonResidentCalled); + + EXPECT_EQ(6u, bcsCsr->makeResidentAllocations.size()); +} + HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.recordFlusheBatchBuffer = true;