mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Check if cache flush for BCS is required
Change-Id: Ia36856c46fe7da7a72dae14e2543456fb30ec409 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
baf80c28ec
commit
4c781c1b98
@@ -394,6 +394,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
bool isCacheFlushCommand(uint32_t commandType) const override;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
|
||||
|
||||
@@ -123,6 +123,11 @@ bool CommandQueueHw<Family>::forceStateless(size_t size) {
|
||||
return size >= 4ull * MemoryConstants::gigaByte;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
|
||||
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
|
||||
|
||||
@@ -211,7 +211,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
if (blitEnqueue) {
|
||||
auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
if (isCacheFlushForBcsRequired()) {
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
}
|
||||
if (!blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) {
|
||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
||||
}
|
||||
@@ -473,12 +475,14 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
|
||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
|
||||
|
||||
auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() +
|
||||
offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
if (isCacheFlushForBcsRequired()) {
|
||||
auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() +
|
||||
offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||
|
||||
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo());
|
||||
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo());
|
||||
}
|
||||
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/local_id_gen.h"
|
||||
#include "opencl/source/device/device_info.h"
|
||||
@@ -209,12 +210,20 @@ void GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxF
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t expectedSizeCS = 0;
|
||||
|
||||
if (blitEnqueue) {
|
||||
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>() +
|
||||
MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
auto &commandQueueHw = static_cast<CommandQueueHw<GfxFamily> &>(commandQueue);
|
||||
|
||||
size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
|
||||
if (commandQueueHw.isCacheFlushForBcsRequired()) {
|
||||
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
}
|
||||
|
||||
return expectedSizeCS;
|
||||
}
|
||||
size_t expectedSizeCS = 0;
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
||||
|
||||
@@ -99,6 +99,12 @@ HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEve
|
||||
pHwQ->virtualEvent = nullptr;
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue) {
|
||||
auto pHwQ = static_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
|
||||
|
||||
EXPECT_TRUE(pHwQ->isCacheFlushForBcsRequired());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenBlockedMapBufferCallWhenMemObjectIsPassedToCommandThenItsRefCountIsBeingIncreased) {
|
||||
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
|
||||
MockBuffer buffer;
|
||||
|
||||
@@ -729,6 +729,9 @@ struct BcsBufferTests : public ::testing::Test {
|
||||
template <typename FamilyType>
|
||||
void TearDownT() {}
|
||||
|
||||
template <typename FamilyType>
|
||||
void waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
@@ -992,25 +995,26 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlitEnqueueWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
|
||||
template <typename FamilyType>
|
||||
void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired();
|
||||
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->getBcsCommandStreamReceiver());
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue.getBcsCommandStreamReceiver());
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
void *hostPtr = reinterpret_cast<void *>(0x12340000);
|
||||
|
||||
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue.enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParserGpGpu;
|
||||
HardwareParse hwParserBcs;
|
||||
hwParserGpGpu.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||
hwParserGpGpu.parseCommands<FamilyType>(*commandQueue.peekCommandStream());
|
||||
hwParserBcs.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto gpgpuPipeControls = findAll<PIPE_CONTROL *>(hwParserGpGpu.cmdList.begin(), hwParserGpGpu.cmdList.end());
|
||||
@@ -1024,16 +1028,37 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlitEnqueueWhenProgrammingCmdBufferThenW
|
||||
if (cacheFlushWriteAddress != 0) {
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(0u, pipeControlCmd->getImmediateData());
|
||||
EXPECT_EQ(isCacheFlushForBcsRequired, 0u == pipeControlCmd->getImmediateData());
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_NE(0u, cacheFlushWriteAddress);
|
||||
|
||||
auto bcsSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserBcs.cmdList.begin(), hwParserBcs.cmdList.end());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*bcsSemaphores[0]);
|
||||
size_t additionalSemaphores = UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 2 : 0;
|
||||
|
||||
EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
if (isCacheFlushForBcsRequired) {
|
||||
EXPECT_NE(0u, cacheFlushWriteAddress);
|
||||
EXPECT_EQ(1u + additionalSemaphores, bcsSemaphores.size());
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*bcsSemaphores[0]);
|
||||
EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
} else {
|
||||
EXPECT_EQ(additionalSemaphores, bcsSemaphores.size());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
waitForCacheFlushFromBcsTest<FamilyType>(*cmdQ);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithoutCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
waitForCacheFlushFromBcsTest<FamilyType>(*cmdQ);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) {
|
||||
@@ -1075,10 +1100,15 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
|
||||
|
||||
HardwareParse bcsHwParser;
|
||||
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]))->getSemaphoreGraphicsAddress());
|
||||
|
||||
if (cmdQ->isCacheFlushForBcsRequired()) {
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]))->getSemaphoreGraphicsAddress());
|
||||
} else {
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
|
||||
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]))->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
|
||||
@@ -1155,7 +1185,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
|
||||
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
|
||||
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
|
||||
if (cmdQ->isCacheFlushForBcsRequired()) {
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
|
||||
} else {
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
|
||||
}
|
||||
|
||||
cmdQ->isQueueBlocked();
|
||||
}
|
||||
@@ -1173,8 +1208,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
|
||||
true, *cmdQ, multiDispatchInfo);
|
||||
auto copyBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false,
|
||||
true, *cmdQ, multiDispatchInfo);
|
||||
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>() +
|
||||
MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
|
||||
|
||||
if (cmdQ->isCacheFlushForBcsRequired()) {
|
||||
expectedSize += MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedSize, readBufferCmdsSize);
|
||||
EXPECT_EQ(expectedSize, writeBufferCmdsSize);
|
||||
|
||||
@@ -159,6 +159,13 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return BaseClass::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
|
||||
}
|
||||
|
||||
bool isCacheFlushForBcsRequired() const override {
|
||||
if (overrideIsCacheFlushForBcsRequired.enabled) {
|
||||
return overrideIsCacheFlushForBcsRequired.returnValue;
|
||||
}
|
||||
return BaseClass::isCacheFlushForBcsRequired();
|
||||
}
|
||||
|
||||
unsigned int lastCommandType;
|
||||
std::vector<Kernel *> lastEnqueuedKernels;
|
||||
MultiDispatchInfo storedMultiDispatchInfo;
|
||||
@@ -169,6 +176,10 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
bool notifyEnqueueReadBufferCalled = false;
|
||||
bool notifyEnqueueReadImageCalled = false;
|
||||
bool cpuDataTransferHandlerCalled = false;
|
||||
struct OverrideReturnValue {
|
||||
bool enabled = false;
|
||||
bool returnValue = false;
|
||||
} overrideIsCacheFlushForBcsRequired;
|
||||
BuiltinOpParams kernelParams;
|
||||
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user