Program Semaphore to keep dependency on previous enqueue

Change-Id: I511f39811769f1add179ea5d9cb331fa9c5ccec2
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-09-11 09:43:06 +02:00
committed by sys_ocldev
parent 581805cc88
commit 2b89486fb1
8 changed files with 132 additions and 19 deletions

View File

@ -241,10 +241,17 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
TimestampPacket *timestampPacket = nullptr;
TimestampPacket *currentTimestampPacket = nullptr;
TimestampPacket *previousTimestampPacket = nullptr;
if (device->getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto previousTimestampPacketNode = timestampPacketNode;
obtainNewTimestampPacketNode();
timestampPacket = timestampPacketNode->tag;
currentTimestampPacket = timestampPacketNode->tag;
if (previousTimestampPacketNode && !previousTimestampPacketNode->tag->canBeReleased()) {
// keep dependency on previous enqueue
previousTimestampPacket = previousTimestampPacketNode->tag;
}
}
if (eventBuilder.getEvent()) {
@ -281,7 +288,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
&blockedCommandsData,
hwTimeStamps,
hwPerfCounter,
timestampPacket,
previousTimestampPacket,
currentTimestampPacket,
preemption,
blockQueue,
commandType);

View File

@ -206,7 +206,8 @@ class GpgpuWalkerHelper {
KernelOperation **blockedCommandsData,
HwTimeStamps *hwTimeStamps,
OCLRT::HwPerfCounter *hwPerfCounter,
TimestampPacket *timestampPacket,
TimestampPacket *previousTimestampPacket,
TimestampPacket *currentTimestampPacket,
PreemptionMode preemptionMode,
bool blockQueue,
uint32_t commandType = 0);
@ -297,7 +298,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, cl_uint numEventsInWa
}
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
expectedSizeCS += numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
expectedSizeCS += (numEventsInWaitList + 1) * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
return commandQueue.getCS(expectedSizeCS);
}

View File

@ -435,7 +435,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
KernelOperation **blockedCommandsData,
HwTimeStamps *hwTimeStamps,
OCLRT::HwPerfCounter *hwPerfCounter,
TimestampPacket *timestampPacket,
TimestampPacket *previousTimestampPacket,
TimestampPacket *currentTimestampPacket,
PreemptionMode preemptionMode,
bool blockQueue,
uint32_t commandType) {
@ -497,6 +498,10 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
numEventsInWaitList, eventWaitList);
if (previousTimestampPacket) {
auto compareAddress = previousTimestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, compareAddress, 1);
}
}
dsh->align(KernelCommandsHelper<GfxFamily>::alignInterfaceDescriptorData);
@ -590,9 +595,9 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
dispatchWorkarounds(commandStream, commandQueue, kernel, true);
bool setupTimestampPacket = timestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1);
bool setupTimestampPacket = currentTimestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1);
if (setupTimestampPacket) {
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket,
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, currentTimestampPacket,
TimestampPacket::WriteOperationType::BeforeWalker);
}
@ -601,7 +606,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
*pWalkerCmd = GfxFamily::cmdInitGpgpuWalker;
if (setupTimestampPacket) {
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, pWalkerCmd, timestampPacket,
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, pWalkerCmd, currentTimestampPacket,
TimestampPacket::WriteOperationType::AfterWalker);
}

View File

@ -152,6 +152,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, shouldntChangeCommandStreamMemor
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -200,6 +201,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, noLocalIdsShouldntCrash) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -230,6 +232,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -261,6 +264,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(dimension, *kernel.workDim);
@ -290,6 +294,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(dimension, *kernel.workDim);
@ -320,6 +325,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(dimension, *kernel.workDim);
@ -350,6 +356,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -382,6 +389,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@ -413,6 +421,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@ -445,6 +454,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@ -477,6 +487,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(2u, *kernel.localWorkSizeX);
@ -507,6 +518,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(1u, *kernel.localWorkSizeX);
@ -540,6 +552,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
EXPECT_EQ(1u, *kernel.localWorkSizeX);
@ -577,6 +590,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -628,6 +642,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -683,6 +698,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
blockQueue);
@ -723,6 +739,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
blockQueue);
@ -761,6 +778,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
blockQueue);
@ -794,6 +812,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) {
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -836,6 +855,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -921,6 +941,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -967,6 +988,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -1018,6 +1040,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -1061,7 +1084,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, givenMultiDispatchWhenWhiteliste
DispatchInfo di2(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2}));
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false);
GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false);
hwParser.parseCommands<FamilyType>(cmdStream, 0);

View File

@ -65,6 +65,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDev
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -121,6 +122,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDef
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -147,6 +149,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSH
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -183,6 +186,7 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
true);
ASSERT_NE(nullptr, blockedCommandsData);
@ -281,6 +285,7 @@ HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatched
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
true);
@ -315,6 +320,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -372,6 +378,7 @@ HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedT
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
@ -408,6 +415,7 @@ HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatch
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);

View File

@ -450,6 +450,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
nullptr,
nullptr,
nullptr,
nullptr,
device->getPreemptionMode(),
true);

View File

@ -65,6 +65,12 @@ struct TimestampPacketTests : public ::testing::Test {
std::vector<NodeType *> releaseReferenceNodes;
std::vector<NodeType *> returnedToFreePoolNodes;
};
void setTagToReadyState(TimestampPacket *tag) {
memset(reinterpret_cast<void *>(tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)), 0, timestampDataSize);
}
const size_t timestampDataSize = sizeof(uint32_t) * static_cast<size_t>(TimestampPacket::DataIndex::Max);
};
TEST_F(TimestampPacketTests, whenEndTagIsNotOneThenCanBeReleased) {
@ -142,7 +148,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, 0, false, false, multiDispatchInfo);
auto sizeWithEnabled = cmdQ.requestedCmdStreamSize;
EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + (2 * sizeof(typename FamilyType::PIPE_CONTROL)));
auto extendedSize = sizeWithDisabled + (2 * sizeof(typename FamilyType::PIPE_CONTROL)) + sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
EXPECT_EQ(sizeWithEnabled, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeWithWaitlistThenAddSizeForSemaphores) {
@ -163,7 +171,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
auto sizeWithEnabled = cmdQ.requestedCmdStreamSize;
size_t extendedSize = sizeWithDisabled + EnqueueOperation<FamilyType>::getSizeRequiredForTimestampPacketWrite() +
(numEventsOnWaitlist * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
((numEventsOnWaitlist + 1) * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
EXPECT_EQ(sizeWithEnabled, extendedSize);
}
@ -190,6 +198,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
nullptr,
nullptr,
nullptr,
nullptr,
&timestampPacket,
device->getPreemptionMode(),
false);
@ -267,10 +276,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
EXPECT_EQ(node1, mockTagAllocator->releaseReferenceNodes.at(0));
EXPECT_NE(node1, node2);
size_t dataSize = sizeof(uint32_t) * static_cast<size_t>(TimestampPacket::DataIndex::Max);
// mark nodes as ready
memset(reinterpret_cast<void *>(node1->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)), 0, dataSize);
memset(reinterpret_cast<void *>(node2->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)), 0, dataSize);
setTagToReadyState(node1->tag);
setTagToReadyState(node2->tag);
clReleaseEvent(event2);
EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); // nothing returned. cmdQ owns node2
@ -446,6 +453,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
nullptr,
nullptr,
nullptr,
nullptr,
device1->getPreemptionMode(),
false);
@ -493,12 +501,69 @@ TEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenObtainingThenGetNewBefo
auto firstNode = cmdQ.timestampPacketNode;
EXPECT_TRUE(mockTagAllocator->freeTags.peekIsEmpty());
// mark as ready to release
size_t dataSize = sizeof(uint32_t) * static_cast<size_t>(TimestampPacket::DataIndex::Max);
memset(reinterpret_cast<void *>(firstNode->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)), 0, dataSize);
setTagToReadyState(firstNode->tag);
cmdQ.obtainNewTimestampPacketNode();
auto secondNode = cmdQ.timestampPacketNode;
EXPECT_FALSE(mockTagAllocator->freeTags.peekIsEmpty()); // new pool allocated for secondNode
EXPECT_NE(firstNode, secondNode);
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKeepDependencyOnPreviousNodeIfItsReady) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockContext context(device.get());
MockKernelWithInternals kernel(*device);
MockCommandQueueHw<FamilyType> cmdQ(&context, device.get(), nullptr);
cmdQ.obtainNewTimestampPacketNode();
auto firstNode = cmdQ.timestampPacketNode;
setTagToReadyState(firstNode->tag);
size_t gws[] = {1, 1, 1};
cmdQ.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
uint32_t semaphoresFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
}
EXPECT_EQ(0u, semaphoresFound);
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockContext context(device.get());
MockKernelWithInternals kernel(*device);
MockCommandQueueHw<FamilyType> cmdQ(&context, device.get(), nullptr);
cmdQ.obtainNewTimestampPacketNode();
auto firstNode = cmdQ.timestampPacketNode;
size_t gws[] = {1, 1, 1};
cmdQ.enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*hwParser.cmdList.begin());
EXPECT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(firstNode->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
uint32_t semaphoresFound = 0;
auto it = hwParser.cmdList.begin();
for (++it; it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
}
EXPECT_EQ(0u, semaphoresFound);
}

View File

@ -78,7 +78,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
typedef CommandQueueHw<GfxFamily> BaseClass;
public:
using BaseClass::commandStream;
using BaseClass::createAllocationForHostSurface;
using BaseClass::obtainNewTimestampPacketNode;
using BaseClass::timestampPacketNode;
MockCommandQueueHw(Context *context,