mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Use Semaphore to wait for dependencies from different device
Change-Id: I154f377c77847e93d5b188a5b2252e74d9d70b75 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
@@ -325,6 +325,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel,
|
||||
bool slmUsed,
|
||||
|
||||
@@ -347,6 +347,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
auto submissionRequired = isCommandWithoutKernel(commandType) ? false : true;
|
||||
|
||||
if (submissionRequired) {
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
|
||||
|
||||
completionStamp = enqueueNonBlocked<commandType>(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
@@ -354,6 +356,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
commandStreamStart,
|
||||
blocking,
|
||||
multiDispatchInfo,
|
||||
eventsRequest,
|
||||
eventBuilder,
|
||||
taskLevel,
|
||||
slmUsed,
|
||||
@@ -507,6 +510,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
uint32_t taskLevel,
|
||||
bool slmUsed,
|
||||
@@ -588,6 +592,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
|
||||
@@ -92,6 +92,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config);
|
||||
void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
void programOutOfDeviceWaitlistSemaphores(LinearStream &csr, DispatchFlags &dispatchFlags, Device ¤tDevice);
|
||||
virtual void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||
virtual void initPageTableManagerRegisters(LinearStream &csr){};
|
||||
|
||||
|
||||
@@ -262,6 +262,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
if (dispatchFlags.outOfDeviceDependencies) {
|
||||
programOutOfDeviceWaitlistSemaphores(commandStreamCSR, dispatchFlags, device);
|
||||
}
|
||||
initPageTableManagerRegisters(commandStreamCSR);
|
||||
programPreemption(commandStreamCSR, device, dispatchFlags);
|
||||
programCoherency(commandStreamCSR, dispatchFlags);
|
||||
@@ -650,6 +653,9 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
if (experimentalCmdBuffer.get() != nullptr) {
|
||||
size += experimentalCmdBuffer->getRequiredInjectionSize<GfxFamily>();
|
||||
}
|
||||
if (dispatchFlags.outOfDeviceDependencies) {
|
||||
size += dispatchFlags.outOfDeviceDependencies->numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -777,4 +783,25 @@ void CommandStreamReceiverHw<GfxFamily>::resetKmdNotifyHelper(KmdNotifyHelper *n
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programOutOfDeviceWaitlistSemaphores(LinearStream &csr, DispatchFlags &dispatchFlags, Device ¤tDevice) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
for (cl_uint i = 0; i < dispatchFlags.outOfDeviceDependencies->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(dispatchFlags.outOfDeviceDependencies->eventWaitList[i]);
|
||||
if (event->isUserEvent() || (&event->getCommandQueue()->getDevice() == ¤tDevice)) {
|
||||
continue;
|
||||
}
|
||||
auto timestampPacket = event->getTimestampPacket();
|
||||
|
||||
auto compareAddress = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
|
||||
auto miSemaphoreCmd = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
|
||||
*miSemaphoreCmd = MI_SEMAPHORE_WAIT::sInit();
|
||||
miSemaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
miSemaphoreCmd->setSemaphoreDataDword(1);
|
||||
miSemaphoreCmd->setSemaphoreGraphicsAddress(compareAddress);
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -57,6 +57,7 @@ struct DispatchFlags {
|
||||
bool outOfOrderExecutionAllowed = false;
|
||||
FlushStampTrackingObj *flushStampReference = nullptr;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
EventsRequest *outOfDeviceDependencies = nullptr;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
|
||||
@@ -319,6 +319,85 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
|
||||
EXPECT_TRUE(walkerFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrThenAddSizeForSemaphores) {
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
|
||||
cl_uint numEventsOnWaitlist = 5;
|
||||
EventsRequest eventsRequest(numEventsOnWaitlist, nullptr, nullptr);
|
||||
DispatchFlags flags;
|
||||
|
||||
auto sizeWithoutEvents = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
|
||||
|
||||
flags.outOfDeviceDependencies = &eventsRequest;
|
||||
auto sizeWithEvents = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
|
||||
|
||||
size_t extendedSize = sizeWithoutEvents + (numEventsOnWaitlist * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
|
||||
|
||||
EXPECT_EQ(sizeWithEvents, extendedSize);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
ExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.incRefInternal();
|
||||
auto device1 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
|
||||
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
|
||||
|
||||
device1->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
MockContext context1(device1.get());
|
||||
MockContext context2(device2.get());
|
||||
|
||||
MockKernelWithInternals kernel(*device1, &context1);
|
||||
|
||||
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context1, device1.get(), nullptr);
|
||||
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context2, device2.get(), nullptr);
|
||||
|
||||
const cl_uint eventsOnWaitlist = 6;
|
||||
TagNode<TimestampPacket> *tagNodes[eventsOnWaitlist];
|
||||
for (size_t i = 0; i < eventsOnWaitlist; i++) {
|
||||
tagNodes[i] = executionEnvironment.memoryManager->getTimestampPacketAllocator()->getTag();
|
||||
}
|
||||
|
||||
UserEvent event1;
|
||||
event1.setStatus(CL_COMPLETE);
|
||||
UserEvent event2;
|
||||
event2.setStatus(CL_COMPLETE);
|
||||
Event event3(cmdQ1.get(), 0, 0, 0);
|
||||
event3.setTimestampPacketNode(tagNodes[2]);
|
||||
Event event4(cmdQ2.get(), 0, 0, 0);
|
||||
event4.setTimestampPacketNode(tagNodes[3]);
|
||||
Event event5(cmdQ1.get(), 0, 0, 0);
|
||||
event5.setTimestampPacketNode(tagNodes[4]);
|
||||
Event event6(cmdQ2.get(), 0, 0, 0);
|
||||
event6.setTimestampPacketNode(tagNodes[5]);
|
||||
|
||||
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
|
||||
|
||||
size_t gws[] = {1, 1, 1};
|
||||
cmdQ1->enqueueKernel(kernel.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
|
||||
auto &cmdStream = device1->getUltCommandStreamReceiver<FamilyType>().commandStream;
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
||||
auto verifySemaphore = [](MI_SEMAPHORE_WAIT *semaphoreCmd, Event *compareEvent) {
|
||||
EXPECT_NE(nullptr, semaphoreCmd);
|
||||
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(compareEvent->getTimestampPacket()->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd),
|
||||
semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
};
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event4);
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event6);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
|
||||
it++;
|
||||
}
|
||||
}
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using WALKER = WALKER_TYPE<FamilyType>;
|
||||
|
||||
Reference in New Issue
Block a user