mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Use Semaphore to wait for dependencies on the same device
Change-Id: Ib04c960c50183c080d02753815ece80b58d1980e Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
393ce116e7
commit
d04614dce3
@ -206,7 +206,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
|
||||
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, profilingRequired, perfCountersRequired, multiDispatchInfo);
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, numEventsInWaitList, profilingRequired, perfCountersRequired, multiDispatchInfo);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel);
|
||||
|
@ -261,6 +261,9 @@ class GpgpuWalkerHelper {
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh);
|
||||
|
||||
static void dispatchOnDeviceWaitlistSemaphores(LinearStream *commandStream, Device ¤tDevice,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -282,7 +285,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
|
||||
}
|
||||
|
||||
template <typename GfxFamily, uint32_t eventType>
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
LinearStream &getCommandStream(CommandQueue &commandQueue, cl_uint numEventsInWaitList, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t expectedSizeCS = 0;
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
@ -294,6 +297,7 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
|
||||
}
|
||||
if (commandQueue.getDevice().peekCommandStreamReceiver()->peekTimestampPacketWriteEnabled()) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
||||
expectedSizeCS += numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
return commandQueue.getCS(expectedSizeCS);
|
||||
}
|
||||
|
@ -494,6 +494,11 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
|
||||
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
|
||||
}
|
||||
|
||||
if (commandQueue.getDevice().peekCommandStreamReceiver()->peekTimestampPacketWriteEnabled()) {
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
|
||||
numEventsInWaitList, eventWaitList);
|
||||
}
|
||||
|
||||
dsh->align(KernelCommandsHelper<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
@ -645,6 +650,28 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
|
||||
dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(LinearStream *commandStream, Device ¤tDevice,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
for (cl_uint i = 0; i < numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(eventWaitList[i]);
|
||||
if (event->isUserEvent() || (&event->getCommandQueue()->getDevice() != ¤tDevice)) {
|
||||
continue;
|
||||
}
|
||||
auto timestampPacket = event->getTimestampPacket();
|
||||
|
||||
auto compareAddress = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
|
||||
auto miSemaphoreCmd = commandStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
|
||||
*miSemaphoreCmd = MI_SEMAPHORE_WAIT::sInit();
|
||||
miSemaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
miSemaphoreCmd->setSemaphoreDataDword(1);
|
||||
miSemaphoreCmd->setSemaphoreGraphicsAddress(compareAddress);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void GpgpuWalkerHelper<GfxFamily>::getDefaultDshSpace(
|
||||
const size_t &offsetInterfaceDescriptorTable,
|
||||
|
@ -719,4 +719,5 @@ void Event::setTimestampPacketNode(TagNode<TimestampPacket> *node) {
|
||||
timestampPacketNode = node;
|
||||
}
|
||||
|
||||
TimestampPacket *Event::getTimestampPacket() const { return timestampPacketNode->tag; }
|
||||
} // namespace OCLRT
|
||||
|
@ -127,6 +127,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
GraphicsAllocation *getHwTimeStampAllocation();
|
||||
|
||||
void setTimestampPacketNode(TagNode<TimestampPacket> *node);
|
||||
TimestampPacket *getTimestampPacket() const;
|
||||
|
||||
bool isPerfCountersEnabled() {
|
||||
return perfCountersEnabled;
|
||||
|
@ -241,7 +241,7 @@ HWTEST_F(ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhen
|
||||
|
||||
size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize);
|
||||
|
||||
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, false, false, multiDispatchInfo);
|
||||
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, 0, false, false, multiDispatchInfo);
|
||||
|
||||
EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace());
|
||||
|
||||
|
@ -134,16 +134,39 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
|
||||
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel1.mockKernel, kernel2.mockKernel}));
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, false, false, multiDispatchInfo);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, 0, false, false, multiDispatchInfo);
|
||||
auto sizeWithDisabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, false, false, multiDispatchInfo);
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, 0, false, false, multiDispatchInfo);
|
||||
auto sizeWithEnabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + (2 * sizeof(typename FamilyType::PIPE_CONTROL)));
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeWithWaitlistThenAddSizeForSemaphores) {
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockCommandQueue cmdQ(nullptr, device.get(), nullptr);
|
||||
MockKernelWithInternals kernel1(*device);
|
||||
MockKernelWithInternals kernel2(*device);
|
||||
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel1.mockKernel, kernel2.mockKernel}));
|
||||
|
||||
cl_uint numEventsOnWaitlist = 5;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, numEventsOnWaitlist, false, false, multiDispatchInfo);
|
||||
auto sizeWithDisabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, numEventsOnWaitlist, false, false, multiDispatchInfo);
|
||||
auto sizeWithEnabled = cmdQ.requestedCmdStreamSize;
|
||||
|
||||
size_t extendedSize = sizeWithDisabled + (2 * sizeof(typename FamilyType::PIPE_CONTROL)) +
|
||||
(numEventsOnWaitlist * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
|
||||
|
||||
EXPECT_EQ(sizeWithEnabled, extendedSize);
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
@ -295,3 +318,85 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
|
||||
}
|
||||
EXPECT_TRUE(walkerFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using WALKER = WALKER_TYPE<FamilyType>;
|
||||
ExecutionEnvironment executionEnvironment;
|
||||
executionEnvironment.incRefInternal();
|
||||
auto device1 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
|
||||
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment));
|
||||
MockKernelWithInternals kernel1(*device1);
|
||||
device1->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
MockContext context1(device1.get());
|
||||
MockContext context2(device2.get());
|
||||
|
||||
MockMultiDispatchInfo
|
||||
multiDispatchInfo(std::vector<Kernel *>({kernel1.mockKernel}));
|
||||
|
||||
MockCommandQueue cmdQ1(&context1, device1.get(), nullptr);
|
||||
MockCommandQueue cmdQ2(&context2, device2.get(), nullptr);
|
||||
auto &cmdStream = cmdQ1.getCS(0);
|
||||
|
||||
const cl_uint eventsOnWaitlist = 6;
|
||||
TagNode<TimestampPacket> *tagNodes[eventsOnWaitlist];
|
||||
for (size_t i = 0; i < eventsOnWaitlist; i++) {
|
||||
tagNodes[i] = executionEnvironment.memoryManager->getTimestampPacketAllocator()->getTag();
|
||||
}
|
||||
|
||||
UserEvent event1;
|
||||
UserEvent event2;
|
||||
Event event3(&cmdQ1, 0, 0, 0);
|
||||
event3.setTimestampPacketNode(tagNodes[2]);
|
||||
Event event4(&cmdQ2, 0, 0, 0);
|
||||
event4.setTimestampPacketNode(tagNodes[3]);
|
||||
Event event5(&cmdQ1, 0, 0, 0);
|
||||
event5.setTimestampPacketNode(tagNodes[4]);
|
||||
Event event6(&cmdQ2, 0, 0, 0);
|
||||
event6.setTimestampPacketNode(tagNodes[5]);
|
||||
|
||||
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
|
||||
|
||||
GpgpuWalkerHelper<FamilyType>::dispatchWalker(
|
||||
cmdQ1,
|
||||
multiDispatchInfo,
|
||||
eventsOnWaitlist,
|
||||
waitlist,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
device1->getPreemptionMode(),
|
||||
false);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
||||
auto verifySemaphore = [](MI_SEMAPHORE_WAIT *semaphoreCmd, Event *compareEvent) {
|
||||
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(compareEvent->getTimestampPacket()->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd),
|
||||
semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
};
|
||||
|
||||
uint32_t semaphoresFound = 0;
|
||||
uint32_t walkersFound = 0;
|
||||
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
|
||||
if (semaphoreCmd) {
|
||||
semaphoresFound++;
|
||||
if (semaphoresFound == 1) {
|
||||
verifySemaphore(semaphoreCmd, &event3);
|
||||
} else if (semaphoresFound == 2) {
|
||||
verifySemaphore(semaphoreCmd, &event5);
|
||||
}
|
||||
}
|
||||
if (genCmdCast<WALKER *>(*it)) {
|
||||
walkersFound++;
|
||||
EXPECT_EQ(2u, semaphoresFound); // semaphores from events programmed before walker
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, walkersFound);
|
||||
EXPECT_EQ(2u, semaphoresFound); // total number of semaphores found in cmdList
|
||||
}
|
||||
|
@ -343,6 +343,21 @@ struct GENX {
|
||||
}
|
||||
} STATE_SIP;
|
||||
|
||||
typedef struct tagMI_SEMAPHORE_WAIT {
|
||||
typedef enum tagCOMPARE_OPERATION {
|
||||
COMPARE_OPERATION_SAD_NOT_EQUAL_SDD = 0x5,
|
||||
} COMPARE_OPERATION;
|
||||
|
||||
static tagMI_SEMAPHORE_WAIT sInit(void) {
|
||||
MI_SEMAPHORE_WAIT state;
|
||||
return state;
|
||||
}
|
||||
|
||||
inline void setSemaphoreDataDword(uint32_t value) {}
|
||||
inline void setSemaphoreGraphicsAddress(uint64_t value) {}
|
||||
inline void setCompareOperation(COMPARE_OPERATION value) {}
|
||||
} MI_SEMAPHORE_WAIT;
|
||||
|
||||
typedef GPGPU_WALKER WALKER_TYPE;
|
||||
static GPGPU_WALKER cmdInitGpgpuWalker;
|
||||
static INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;
|
||||
|
Reference in New Issue
Block a user