mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
Add cache flush for blit enqueues
Change-Id: I31dbeed9973c5077bf79ea7c7534b2430bca5083 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
ec647dabe9
commit
db012c9d5c
@@ -208,9 +208,13 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo);
|
||||
}
|
||||
|
||||
if (blitEnqueue && !blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
|
||||
if (blitEnqueue) {
|
||||
auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
||||
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
|
||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
||||
}
|
||||
}
|
||||
|
||||
if (nodesCount > 0) {
|
||||
@@ -461,12 +465,21 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
|
||||
eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *blitCommandStreamReceiver,
|
||||
CsrDependencies::DependenciesType::All);
|
||||
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies.barrierNodes);
|
||||
}
|
||||
|
||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
|
||||
|
||||
auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() +
|
||||
offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo());
|
||||
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);
|
||||
|
||||
return blitProperties;
|
||||
@@ -920,6 +933,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
if (timestampPacketContainer) {
|
||||
timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver());
|
||||
timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver());
|
||||
timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
|
||||
@@ -210,7 +210,9 @@ void GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxF
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
if (blitEnqueue) {
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
|
||||
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>() +
|
||||
MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
}
|
||||
size_t expectedSizeCS = 0;
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
|
||||
@@ -285,6 +285,7 @@ void CommandWithoutKernel::dispatchBlitOperation() {
|
||||
UNRECOVERABLE_IF(kernelOperation->blitPropertiesContainer.size() != 1);
|
||||
auto &blitProperties = *kernelOperation->blitPropertiesContainer.begin();
|
||||
eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->cacheFlushNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.push_back(×tampPacketDependencies->barrierNodes);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
@@ -403,6 +404,7 @@ void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamR
|
||||
currentTimestampPacketNodes->makeResident(commandStreamReceiver);
|
||||
}
|
||||
if (timestampPacketDependencies) {
|
||||
timestampPacketDependencies->cacheFlushNodes.makeResident(commandStreamReceiver);
|
||||
timestampPacketDependencies->previousEnqueueNodes.makeResident(commandStreamReceiver);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,6 +175,8 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
|
||||
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
|
||||
|
||||
mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true);
|
||||
|
||||
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
|
||||
|
||||
@@ -214,6 +216,7 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO
|
||||
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
|
||||
|
||||
mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true);
|
||||
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
|
||||
@@ -992,6 +992,50 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlitEnqueueWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->getBcsCommandStreamReceiver());
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
||||
|
||||
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParserGpGpu;
|
||||
HardwareParse hwParserBcs;
|
||||
hwParserGpGpu.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||
hwParserBcs.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto gpgpuPipeControls = findAll<PIPE_CONTROL *>(hwParserGpGpu.cmdList.begin(), hwParserGpGpu.cmdList.end());
|
||||
uint64_t cacheFlushWriteAddress = 0;
|
||||
|
||||
for (auto &pipeControl : gpgpuPipeControls) {
|
||||
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControl);
|
||||
uint64_t addressHigh = static_cast<uint64_t>(pipeControlCmd->getAddressHigh()) << 32;
|
||||
uint64_t addressLow = pipeControlCmd->getAddress();
|
||||
cacheFlushWriteAddress = addressHigh | addressLow;
|
||||
if (cacheFlushWriteAddress != 0) {
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(0u, pipeControlCmd->getImmediateData());
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_NE(0u, cacheFlushWriteAddress);
|
||||
|
||||
auto bcsSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserBcs.cmdList.begin(), hwParserBcs.cmdList.end());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*bcsSemaphores[0]);
|
||||
|
||||
EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
@@ -1033,8 +1077,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
|
||||
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
|
||||
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]))->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]))->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
|
||||
@@ -1111,7 +1155,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
|
||||
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
|
||||
cmdQ->isQueueBlocked();
|
||||
}
|
||||
@@ -1121,13 +1165,16 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
|
||||
CsrDependencies csrDependencies;
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
|
||||
auto &hwInfo = cmdQ->getDevice().getHardwareInfo();
|
||||
|
||||
auto readBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo);
|
||||
auto writeBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo);
|
||||
auto copyBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo);
|
||||
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
|
||||
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>() +
|
||||
MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
|
||||
EXPECT_EQ(expectedSize, readBufferCmdsSize);
|
||||
EXPECT_EQ(expectedSize, writeBufferCmdsSize);
|
||||
|
||||
@@ -95,6 +95,7 @@ class TimestampPacketContainer : public NonCopyableClass {
|
||||
};
|
||||
|
||||
struct TimestampPacketDependencies : public NonCopyableClass {
|
||||
TimestampPacketContainer cacheFlushNodes;
|
||||
TimestampPacketContainer previousEnqueueNodes;
|
||||
TimestampPacketContainer barrierNodes;
|
||||
TimestampPacketContainer auxToNonAuxNodes;
|
||||
|
||||
Reference in New Issue
Block a user