Check if cache flush for BCS is required

Change-Id: Ia36856c46fe7da7a72dae14e2543456fb30ec409
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-02-28 09:07:07 +01:00
committed by sys_ocldev
parent baf80c28ec
commit 4c781c1b98
7 changed files with 99 additions and 24 deletions

View File

@ -99,6 +99,12 @@ HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEve
pHwQ->virtualEvent = nullptr;
}
HWTEST_F(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue) {
auto pHwQ = static_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
EXPECT_TRUE(pHwQ->isCacheFlushForBcsRequired());
}
HWTEST_F(CommandQueueHwTest, givenBlockedMapBufferCallWhenMemObjectIsPassedToCommandThenItsRefCountIsBeingIncreased) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockBuffer buffer;

View File

@ -729,6 +729,9 @@ struct BcsBufferTests : public ::testing::Test {
template <typename FamilyType>
void TearDownT() {}
template <typename FamilyType>
void waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue);
DebugManagerStateRestore restore;
std::unique_ptr<OsContext> bcsOsContext;
@ -992,25 +995,26 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlitEnqueueWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
template <typename FamilyType>
void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired();
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->getBcsCommandStreamReceiver());
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue.getBcsCommandStreamReceiver());
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue.enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
HardwareParse hwParserGpGpu;
HardwareParse hwParserBcs;
hwParserGpGpu.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
hwParserGpGpu.parseCommands<FamilyType>(*commandQueue.peekCommandStream());
hwParserBcs.parseCommands<FamilyType>(bcsCsr->commandStream);
auto gpgpuPipeControls = findAll<PIPE_CONTROL *>(hwParserGpGpu.cmdList.begin(), hwParserGpGpu.cmdList.end());
@ -1024,16 +1028,37 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlitEnqueueWhenProgrammingCmdBufferThenW
if (cacheFlushWriteAddress != 0) {
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
EXPECT_EQ(0u, pipeControlCmd->getImmediateData());
EXPECT_EQ(isCacheFlushForBcsRequired, 0u == pipeControlCmd->getImmediateData());
break;
}
}
EXPECT_NE(0u, cacheFlushWriteAddress);
auto bcsSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserBcs.cmdList.begin(), hwParserBcs.cmdList.end());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*bcsSemaphores[0]);
size_t additionalSemaphores = UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 2 : 0;
EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
if (isCacheFlushForBcsRequired) {
EXPECT_NE(0u, cacheFlushWriteAddress);
EXPECT_EQ(1u + additionalSemaphores, bcsSemaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*bcsSemaphores[0]);
EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
} else {
EXPECT_EQ(additionalSemaphores, bcsSemaphores.size());
}
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true;
cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true;
waitForCacheFlushFromBcsTest<FamilyType>(*cmdQ);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithoutCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) {
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true;
cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false;
waitForCacheFlushFromBcsTest<FamilyType>(*cmdQ);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) {
@ -1075,10 +1100,15 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
HardwareParse bcsHwParser;
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]))->getSemaphoreGraphicsAddress());
if (cmdQ->isCacheFlushForBcsRequired()) {
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]))->getSemaphoreGraphicsAddress());
} else {
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]))->getSemaphoreGraphicsAddress());
}
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
@ -1155,7 +1185,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlocked
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
if (cmdQ->isCacheFlushForBcsRequired()) {
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size());
} else {
EXPECT_EQ(UnitTestHelper<FamilyType>::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size());
}
cmdQ->isQueueBlocked();
}
@ -1173,8 +1208,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
true, *cmdQ, multiDispatchInfo);
auto copyBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false,
true, *cmdQ, multiDispatchInfo);
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>() +
MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
if (cmdQ->isCacheFlushForBcsRequired()) {
expectedSize += MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
}
EXPECT_EQ(expectedSize, readBufferCmdsSize);
EXPECT_EQ(expectedSize, writeBufferCmdsSize);

View File

@ -159,6 +159,13 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
return BaseClass::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
}
bool isCacheFlushForBcsRequired() const override {
if (overrideIsCacheFlushForBcsRequired.enabled) {
return overrideIsCacheFlushForBcsRequired.returnValue;
}
return BaseClass::isCacheFlushForBcsRequired();
}
unsigned int lastCommandType;
std::vector<Kernel *> lastEnqueuedKernels;
MultiDispatchInfo storedMultiDispatchInfo;
@ -169,6 +176,10 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool notifyEnqueueReadBufferCalled = false;
bool notifyEnqueueReadImageCalled = false;
bool cpuDataTransferHandlerCalled = false;
struct OverrideReturnValue {
bool enabled = false;
bool returnValue = false;
} overrideIsCacheFlushForBcsRequired;
BuiltinOpParams kernelParams;
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};