Use Semaphore to wait for dependencies from different device

Change-Id: I154f377c77847e93d5b188a5b2252e74d9d70b75
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-09-07 14:31:37 +02:00
parent 2315403542
commit 24136414e6
6 changed files with 116 additions and 0 deletions

View File

@@ -325,6 +325,7 @@ class CommandQueueHw : public CommandQueue {
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
uint32_t taskLevel,
bool slmUsed,

View File

@@ -347,6 +347,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true;
if (submissionRequired) {
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
completionStamp = enqueueNonBlocked<commandType>(
surfacesForResidency,
numSurfaceForResidency,
@@ -354,6 +356,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
commandStreamStart,
blocking,
multiDispatchInfo,
eventsRequest,
eventBuilder,
taskLevel,
slmUsed,
@@ -507,6 +510,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
uint32_t taskLevel,
bool slmUsed,
@@ -588,6 +592,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
}
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@@ -92,6 +92,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programOutOfDeviceWaitlistSemaphores(LinearStream &csr, DispatchFlags &dispatchFlags, Device &currentDevice);
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};

View File

@@ -262,6 +262,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
auto commandStreamStartCSR = commandStreamCSR.getUsed();
if (dispatchFlags.outOfDeviceDependencies) {
programOutOfDeviceWaitlistSemaphores(commandStreamCSR, dispatchFlags, device);
}
initPageTableManagerRegisters(commandStreamCSR);
programPreemption(commandStreamCSR, device, dispatchFlags);
programCoherency(commandStreamCSR, dispatchFlags);
@@ -650,6 +653,9 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
if (experimentalCmdBuffer.get() != nullptr) {
size += experimentalCmdBuffer->getRequiredInjectionSize<GfxFamily>();
}
if (dispatchFlags.outOfDeviceDependencies) {
size += dispatchFlags.outOfDeviceDependencies->numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
return size;
}
@@ -777,4 +783,25 @@ void CommandStreamReceiverHw<GfxFamily>::resetKmdNotifyHelper(KmdNotifyHelper *n
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) {
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programOutOfDeviceWaitlistSemaphores(LinearStream &csr, DispatchFlags &dispatchFlags, Device &currentDevice) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
for (cl_uint i = 0; i < dispatchFlags.outOfDeviceDependencies->numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(dispatchFlags.outOfDeviceDependencies->eventWaitList[i]);
if (event->isUserEvent() || (&event->getCommandQueue()->getDevice() == &currentDevice)) {
continue;
}
auto timestampPacket = event->getTimestampPacket();
auto compareAddress = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
auto miSemaphoreCmd = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
*miSemaphoreCmd = MI_SEMAPHORE_WAIT::sInit();
miSemaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
miSemaphoreCmd->setSemaphoreDataDword(1);
miSemaphoreCmd->setSemaphoreGraphicsAddress(compareAddress);
}
}
} // namespace OCLRT

View File

@@ -57,6 +57,7 @@ struct DispatchFlags {
bool outOfOrderExecutionAllowed = false;
FlushStampTrackingObj *flushStampReference = nullptr;
PreemptionMode preemptionMode = PreemptionMode::Disabled;
EventsRequest *outOfDeviceDependencies = nullptr;
};
struct CsrSizeRequestFlags {

View File

@@ -319,6 +319,85 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
EXPECT_TRUE(walkerFound);
}
HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrThenAddSizeForSemaphores) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
cl_uint numEventsOnWaitlist = 5;
EventsRequest eventsRequest(numEventsOnWaitlist, nullptr, nullptr);
DispatchFlags flags;
auto sizeWithoutEvents = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
flags.outOfDeviceDependencies = &eventsRequest;
auto sizeWithEvents = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
size_t extendedSize = sizeWithoutEvents + (numEventsOnWaitlist * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
EXPECT_EQ(sizeWithEvents, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ExecutionEnvironment executionEnvironment;
executionEnvironment.incRefInternal();
auto device1 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
device1->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockContext context1(device1.get());
MockContext context2(device2.get());
MockKernelWithInternals kernel(*device1, &context1);
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context1, device1.get(), nullptr);
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context2, device2.get(), nullptr);
const cl_uint eventsOnWaitlist = 6;
TagNode<TimestampPacket> *tagNodes[eventsOnWaitlist];
for (size_t i = 0; i < eventsOnWaitlist; i++) {
tagNodes[i] = executionEnvironment.memoryManager->getTimestampPacketAllocator()->getTag();
}
UserEvent event1;
event1.setStatus(CL_COMPLETE);
UserEvent event2;
event2.setStatus(CL_COMPLETE);
Event event3(cmdQ1.get(), 0, 0, 0);
event3.setTimestampPacketNode(tagNodes[2]);
Event event4(cmdQ2.get(), 0, 0, 0);
event4.setTimestampPacketNode(tagNodes[3]);
Event event5(cmdQ1.get(), 0, 0, 0);
event5.setTimestampPacketNode(tagNodes[4]);
Event event6(cmdQ2.get(), 0, 0, 0);
event6.setTimestampPacketNode(tagNodes[5]);
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
size_t gws[] = {1, 1, 1};
cmdQ1->enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device1->getUltCommandStreamReceiver<FamilyType>().commandStream;
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto verifySemaphore = [](MI_SEMAPHORE_WAIT *semaphoreCmd, Event *compareEvent) {
EXPECT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(compareEvent->getTimestampPacket()->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd),
semaphoreCmd->getSemaphoreGraphicsAddress());
};
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event4);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event6);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
}
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using WALKER = WALKER_TYPE<FamilyType>;