Extend semaphore synchronization for different command stream receivers.

Change-Id: Ic904b8c1e052adbb7b2ef82a6dec74ec69837f9f
This commit is contained in:
Venevtsev, Igor
2019-01-25 10:20:32 +01:00
committed by sys_ocldev
parent 311de0c644
commit 303014582a
7 changed files with 234 additions and 15 deletions

View File

@@ -201,7 +201,7 @@ class GpgpuWalkerHelper {
IndirectHeap *ssh,
IndirectHeap *dsh);
static void dispatchOnDeviceWaitlistSemaphores(LinearStream *commandStream, Device &currentDevice,
static void dispatchOnCsrWaitlistSemaphores(LinearStream *linearStream, CommandStreamReceiver &currentCsr,
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);

View File

@@ -362,16 +362,16 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
}
template <typename GfxFamily>
inline void GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(LinearStream *commandStream, Device &currentDevice,
inline void GpgpuWalkerHelper<GfxFamily>::dispatchOnCsrWaitlistSemaphores(LinearStream *linearStream, CommandStreamReceiver &currentCsr,
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
for (cl_uint i = 0; i < numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(eventWaitList[i]);
if (event->isUserEvent() || (&event->getCommandQueue()->getDevice() != &currentDevice)) {
if (event->isUserEvent() || (&event->getCommandQueue()->getCommandStreamReceiver() != &currentCsr)) {
continue;
}
for (auto &node : event->getTimestampPacketNodes()->peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*commandStream, *node->tag);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*linearStream, *node->tag);
}
}
}

View File

@@ -91,7 +91,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
}
if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
GpgpuWalkerHelper<GfxFamily>::dispatchOnCsrWaitlistSemaphores(commandStream, commandQueue.getCommandStreamReceiver(),
numEventsInWaitList, eventWaitList);
if (previousTimestampPacketNodes) {
for (auto &node : previousTimestampPacketNodes->peekNodes()) {

View File

@@ -80,7 +80,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programPipelineSelect(LinearStream &csr, DispatchFlags &dispatchFlags);
void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags);
void programStateSip(LinearStream &cmdStream, Device &device);
void handleEventsTimestampPacketTags(LinearStream &csr, DispatchFlags &dispatchFlags, Device &currentDevice);
void handleEventsTimestampPacketTags(LinearStream &linearStream, DispatchFlags &dispatchFlags, CommandStreamReceiver &currentCsr);
void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags);
virtual void initPageTableManagerRegisters(LinearStream &csr){};

View File

@@ -260,7 +260,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto commandStreamStartCSR = commandStreamCSR.getUsed();
if (dispatchFlags.outOfDeviceDependencies) {
handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, device);
handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, *this);
}
if (stallingPipeControlOnNextFlushRequired) {
stallingPipeControlOnNextFlushRequired = false;
@@ -784,7 +784,7 @@ void CommandStreamReceiverHw<GfxFamily>::addClearSLMWorkAround(typename GfxFamil
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearStream &csr, DispatchFlags &dispatchFlags, Device &currentDevice) {
void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearStream &linearStream, DispatchFlags &dispatchFlags, CommandStreamReceiver &currentCsr) {
for (cl_uint i = 0; i < dispatchFlags.outOfDeviceDependencies->numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(dispatchFlags.outOfDeviceDependencies->eventWaitList[i]);
if (event->isUserEvent()) {
@@ -792,11 +792,11 @@ void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearS
}
auto timestampPacketContainer = event->getTimestampPacketNodes();
timestampPacketContainer->makeResident(*this);
timestampPacketContainer->makeResident(currentCsr);
if (&event->getCommandQueue()->getDevice() != &currentDevice) {
if (&event->getCommandQueue()->getCommandStreamReceiver() != &currentCsr) {
for (auto &node : timestampPacketContainer->peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(csr, *node->tag);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(linearStream, *node->tag);
}
}
}

View File

@@ -487,6 +487,62 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
}
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
// Create second (LOW_PRIORITY) queue on the same device
cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), props);
cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true;
const cl_uint eventsOnWaitlist = 6;
MockTimestampPacketContainer timestamp3(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp4(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp5(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp6(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 2);
UserEvent event1;
event1.setStatus(CL_COMPLETE);
UserEvent event2;
event2.setStatus(CL_COMPLETE);
Event event3(cmdQ1.get(), 0, 0, 0);
event3.addTimestampPacketNodes(timestamp3);
Event event4(cmdQ2.get(), 0, 0, 0);
event4.addTimestampPacketNodes(timestamp4);
Event event5(cmdQ1.get(), 0, 0, 0);
event5.addTimestampPacketNodes(timestamp5);
Event event6(cmdQ2.get(), 0, 0, 0);
event6.addTimestampPacketNodes(timestamp6);
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0)->tag);
verifyMiAtomic(genCmdCast<MI_ATOMIC *>(*it++), timestamp4.getNode(0)->tag);
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0)->tag);
verifyMiAtomic(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(0)->tag);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1)->tag);
verifyMiAtomic(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(1)->tag);
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
}
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 1u));
@@ -527,6 +583,48 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
}
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
// Create second (LOW_PRIORITY) queue on the same device
cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), props);
cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true;
MockTimestampPacketContainer timestamp0(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp1(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
UserEvent userEvent;
Event event0(cmdQ1.get(), 0, 0, 0);
event0.addTimestampPacketNodes(timestamp0);
Event event1(cmdQ2.get(), 0, 0, 0);
event1.addTimestampPacketNodes(timestamp1);
cl_event waitlist[] = {&userEvent, &event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
EXPECT_EQ(0u, cmdStream.getUsed());
userEvent.setStatus(CL_COMPLETE);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0)->tag);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), timestamp1.getNode(0)->tag);
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
}
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using WALKER = WALKER_TYPE<FamilyType>;
@@ -606,6 +704,86 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenDispatchingThenProgramSemaphoresForWaitlist) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using WALKER = WALKER_TYPE<FamilyType>;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel->mockKernel}));
// Create second (LOW_PRIORITY) queue on the same device
cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto mockCmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), props);
mockCmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true;
auto &cmdStream = mockCmdQ->getCS(0);
const cl_uint eventsOnWaitlist = 6;
MockTimestampPacketContainer timestamp3(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp4(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp5(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 2);
MockTimestampPacketContainer timestamp6(*device->getCommandStreamReceiver().getTimestampPacketAllocator(), 1);
UserEvent event1;
UserEvent event2;
Event event3(mockCmdQ.get(), 0, 0, 0);
event3.addTimestampPacketNodes(timestamp3);
Event event4(mockCmdQ2.get(), 0, 0, 0);
event4.addTimestampPacketNodes(timestamp4);
Event event5(mockCmdQ.get(), 0, 0, 0);
event5.addTimestampPacketNodes(timestamp5);
Event event6(mockCmdQ2.get(), 0, 0, 0);
event6.addTimestampPacketNodes(timestamp6);
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
HardwareInterface<FamilyType>::dispatchWalker(
*mockCmdQ,
multiDispatchInfo,
eventsOnWaitlist,
waitlist,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
device->getPreemptionMode(),
false);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
uint32_t semaphoresFound = 0;
uint32_t walkersFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
if (semaphoreCmd) {
semaphoresFound++;
if (semaphoresFound == 1) {
verifySemaphore(semaphoreCmd, timestamp3.getNode(0)->tag);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp3.getNode(0)->tag);
verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 2) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(0)->tag);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(0)->tag);
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 3) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(1)->tag);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(1)->tag);
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
}
}
if (genCmdCast<WALKER *>(*it)) {
walkersFound++;
EXPECT_EQ(3u, semaphoresFound); // semaphores from events programmed before walker
}
}
EXPECT_EQ(1u, walkersFound);
EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingNonBlockedThenMakeItResident) {
auto mockTagAllocator = new MockTagAllocator<>(executionEnvironment.memoryManager.get(), 1);
@@ -779,6 +957,42 @@ HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueu
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getGraphicsAllocation()));
}
HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueueingThenMakeAllTimestampsResident) {
TagAllocator<TimestampPacket> tagAllocator(executionEnvironment.memoryManager.get(), 1, 1);
auto &ultCsr = device->getUltCommandStreamReceiver<FamilyType>();
ultCsr.timestampPacketWriteEnabled = true;
ultCsr.storeMakeResidentAllocations = true;
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
// Create second (LOW_PRIORITY) queue on the same device
cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0};
auto cmdQ2 = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), props);
cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true;
MockTimestampPacketContainer node1(*ultCsr.getTimestampPacketAllocator(), 0);
MockTimestampPacketContainer node2(*ultCsr.getTimestampPacketAllocator(), 0);
auto tagNode1 = tagAllocator.getTag();
node1.add(tagNode1);
auto tagNode2 = tagAllocator.getTag();
node2.add(tagNode2);
Event event0(cmdQ1.get(), 0, 0, 0);
event0.addTimestampPacketNodes(node1);
Event event1(cmdQ2.get(), 0, 0, 0);
event1.addTimestampPacketNodes(node2);
cl_event waitlist[] = {&event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr);
EXPECT_NE(tagNode1->getGraphicsAllocation(), tagNode2->getGraphicsAllocation());
EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getGraphicsAllocation()));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getGraphicsAllocation()));
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingNonBlockedThenMakeItResident) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,6 +7,7 @@
#pragma once
#include "runtime/command_queue/command_queue_hw.h"
#include "unit_tests/libult/ult_command_stream_receiver.h"
////////////////////////////////////////////////////////////////////////////////
// MockCommandQueue - Core implementation
@@ -76,6 +77,10 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
cl_queue_properties *properties) : BaseClass(context, device, properties) {
}
UltCommandStreamReceiver<GfxFamily> &getUltCommandStreamReceiver() {
return reinterpret_cast<UltCommandStreamReceiver<GfxFamily> &>(*BaseClass::engine->commandStreamReceiver);
}
cl_int enqueueWriteImage(Image *dstImage,
cl_bool blockingWrite,
const size_t *origin,