mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Correct semaphore wait programming for cross device dependencies
when event is blocked by blocked user event then program semaphore wait during unblocking user event Related-To: NEO-3691 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
7a2d3d6369
commit
e027178c37
@ -84,7 +84,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
}
|
||||
if (bcsAllowed) {
|
||||
auto &selectorCopyEngine = device->getDeviceById(0)->getSelectorCopyEngine();
|
||||
bcsEngine = &device->getDeviceById(0)->getEngine(EngineHelpers::getBcsEngineType(hwInfo, selectorCopyEngine), EngineUsage::Regular);
|
||||
bcsEngine = device->getDeviceById(0)->getDevice().tryGetEngine(EngineHelpers::getBcsEngineType(hwInfo, selectorCopyEngine), EngineUsage::Regular);
|
||||
}
|
||||
}
|
||||
|
||||
@ -783,13 +783,18 @@ bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const Ev
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((CL_COMMAND_BARRIER == commandType || CL_COMMAND_MARKER == commandType) &&
|
||||
getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType || CL_COMMAND_MARKER == commandType) {
|
||||
auto timestampPacketWriteEnabled = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
|
||||
if (timestampPacketWriteEnabled || context->getRootDeviceIndices().size() > 1) {
|
||||
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
if (waitlistEvent->getTimestampPacketNodes()) {
|
||||
return true;
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
if (timestampPacketWriteEnabled && waitlistEvent->getTimestampPacketNodes()) {
|
||||
return true;
|
||||
}
|
||||
if (waitlistEvent->getCommandQueue() && waitlistEvent->getCommandQueue()->getDevice().getRootDeviceIndex() != this->getDevice().getRootDeviceIndex()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -982,13 +982,12 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
(uint32_t)multiDispatchInfo.size());
|
||||
}
|
||||
if (storeTimestampPackets) {
|
||||
for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
event->incRefInternal();
|
||||
}
|
||||
command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies));
|
||||
command->setEventsRequest(eventsRequest);
|
||||
} else if (this->context->getRootDeviceIndices().size() > 1) {
|
||||
command->setEventsRequest(eventsRequest);
|
||||
}
|
||||
|
||||
outEvent->setCommand(std::move(command));
|
||||
|
||||
eventBuilder->addParentEvents(ArrayRef<const cl_event>(eventsRequest.eventWaitList, eventsRequest.numEventsInWaitList));
|
||||
|
@ -48,7 +48,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
||||
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||
if (event->isUserEvent()) {
|
||||
if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -245,6 +245,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->areMultipleSubDevicesInContext()); //areMultipleSubDevicesInContext
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
if (timestampPacketDependencies) {
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes;
|
||||
@ -309,6 +313,10 @@ void CommandWithoutKernel::dispatchBlitOperation() {
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr);
|
||||
}
|
||||
|
||||
auto bcsTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled());
|
||||
|
||||
commandQueue.updateBcsTaskCount(bcsTaskCount);
|
||||
@ -329,6 +337,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
auto barrierNodes = timestampPacketDependencies ? ×tampPacketDependencies->barrierNodes : nullptr;
|
||||
auto lockCSR = commandStreamReceiver.obtainUniqueOwnership();
|
||||
|
||||
auto enqueueOperationType = EnqueueProperties::Operation::DependencyResolveOnGpu;
|
||||
@ -336,14 +345,15 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
if (kernelOperation->blitEnqueue) {
|
||||
enqueueOperationType = EnqueueProperties::Operation::Blit;
|
||||
|
||||
UNRECOVERABLE_IF(!barrierNodes);
|
||||
if (commandStreamReceiver.isStallingPipeControlOnNextFlushRequired()) {
|
||||
timestampPacketDependencies->barrierNodes.add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
|
||||
barrierNodes->add(commandStreamReceiver.getTimestampPacketAllocator()->getTag());
|
||||
}
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
×tampPacketDependencies->barrierNodes, //barrierTimestampPacketNodes
|
||||
barrierNodes, //barrierTimestampPacketNodes
|
||||
{}, //pipelineSelectArgs
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
@ -370,7 +380,11 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
|
||||
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled());
|
||||
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled() && commandQueue.getContext().getRootDeviceIndices().size() == 1);
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
@ -402,6 +416,10 @@ void Command::setEventsRequest(EventsRequest &eventsRequest) {
|
||||
auto size = eventsRequest.numEventsInWaitList * sizeof(cl_event);
|
||||
memcpy_s(&eventsWaitlist[0], size, eventsRequest.eventWaitList, size);
|
||||
this->eventsRequest.eventWaitList = &eventsWaitlist[0];
|
||||
for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
event->incRefInternal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -414,12 +432,9 @@ void Command::setTimestampPacketNode(TimestampPacketContainer ¤t, Timestam
|
||||
}
|
||||
|
||||
Command::~Command() {
|
||||
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
for (cl_event &eventFromWaitList : eventsWaitlist) {
|
||||
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
|
||||
event->decRefInternal();
|
||||
}
|
||||
for (cl_event &eventFromWaitList : eventsWaitlist) {
|
||||
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
|
||||
event->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
@ -427,7 +442,7 @@ void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamR
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
for (cl_event &eventFromWaitList : eventsWaitlist) {
|
||||
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
|
||||
if (event->getTimestampPacketNodes()) {
|
||||
if (event->getTimestampPacketNodes() && event->getCommandQueue()->getClDevice().getRootDeviceIndex() == commandStreamReceiver.getRootDeviceIndex()) {
|
||||
event->getTimestampPacketNodes()->makeResident(commandStreamReceiver);
|
||||
}
|
||||
}
|
||||
|
@ -1196,9 +1196,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenBlockingWh
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlushWhenUnblockedThenDCFlushIsAdded) {
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
MockContext ctx(pClDevice);
|
||||
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.timestampPacketWriteEnabled = false;
|
||||
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
|
||||
cl_event blockingEvent;
|
||||
MockEvent<UserEvent> mockEvent(&ctx);
|
||||
blockingEvent = &mockEvent;
|
||||
|
@ -37,9 +37,9 @@ typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests;
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFlushWhenUnblockedThenDCFlushIsNotAdded) {
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
MockContext ctx(pClDevice);
|
||||
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.timestampPacketWriteEnabled = false;
|
||||
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
|
||||
cl_event blockingEvent;
|
||||
MockEvent<UserEvent> mockEvent(&ctx);
|
||||
blockingEvent = &mockEvent;
|
||||
|
@ -250,32 +250,35 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToMarkerThenMiSemaphoreWaitCommandSizeIsIncluded) {
|
||||
struct CrossDeviceDependenciesTests : public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
|
||||
deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
||||
auto device1 = deviceFactory->rootDevices[1];
|
||||
auto device2 = deviceFactory->rootDevices[2];
|
||||
|
||||
cl_device_id devices[] = {device1, device2};
|
||||
|
||||
context = std::make_unique<MockContext>(ClDeviceVector(devices, 2), false);
|
||||
|
||||
pCmdQ1 = context.get()->getSpecialQueue(1u);
|
||||
pCmdQ2 = context.get()->getSpecialQueue(2u);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
}
|
||||
|
||||
std::unique_ptr<UltClDeviceFactory> deviceFactory;
|
||||
std::unique_ptr<MockContext> context;
|
||||
|
||||
CommandQueue *pCmdQ1 = nullptr;
|
||||
CommandQueue *pCmdQ2 = nullptr;
|
||||
};
|
||||
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToMarkerThenMiSemaphoreWaitCommandSizeIsIncluded) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
||||
auto device1 = deviceFactory->rootDevices[1];
|
||||
auto device2 = deviceFactory->rootDevices[2];
|
||||
|
||||
auto mockCsr1 = new MockCommandStreamReceiver(*device1->executionEnvironment, device1->getRootDeviceIndex(), device1->getDeviceBitfield());
|
||||
auto mockCsr2 = new MockCommandStreamReceiver(*device2->executionEnvironment, device2->getRootDeviceIndex(), device2->getDeviceBitfield());
|
||||
|
||||
device1->resetCommandStreamReceiver(mockCsr1);
|
||||
device2->resetCommandStreamReceiver(mockCsr2);
|
||||
|
||||
cl_device_id devices[] = {device1, device2};
|
||||
|
||||
auto context = std::make_unique<MockContext>(ClDeviceVector(devices, 2), false);
|
||||
|
||||
auto pCmdQ1 = context.get()->getSpecialQueue(1u);
|
||||
auto pCmdQ2 = context.get()->getSpecialQueue(2u);
|
||||
|
||||
MockKernelWithInternals mockKernel(ClDeviceVector(devices, 2));
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel.mockKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
|
||||
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
|
||||
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
|
||||
@ -309,11 +312,7 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getCommandStreamReceiver(false));
|
||||
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
EXPECT_EQ(0u, csrDeps.taskCountContainer.size());
|
||||
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
|
||||
@ -339,13 +338,301 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getCommandStreamReceiver(false));
|
||||
|
||||
EXPECT_EQ(3u, csrDeps.taskCountContainer.size());
|
||||
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingCrossDeviceDependenciesForGpgpuCsrThenProgramSemaphoreWaitOnUnblockingEvent) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
|
||||
cl_event outputEvent1{};
|
||||
cl_event inputEvent1 = &userEvent1;
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&inputEvent1,
|
||||
&outputEvent1);
|
||||
|
||||
auto event1 = castToObject<Event>(outputEvent1);
|
||||
|
||||
ASSERT_NE(nullptr, event1);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount());
|
||||
|
||||
cl_int retVal = CL_INVALID_PLATFORM;
|
||||
auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
|
||||
char hostPtr[MemoryConstants::pageSize]{};
|
||||
|
||||
cl_event outputEvent2{};
|
||||
|
||||
pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent1,
|
||||
&outputEvent2);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(3u, semaphores.size());
|
||||
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
|
||||
auto event2 = castToObject<Event>(outputEvent2);
|
||||
|
||||
ASSERT_NE(nullptr, event2);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount());
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&outputEvent2,
|
||||
nullptr);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
event1->release();
|
||||
event2->release();
|
||||
pCmdQ1->finish();
|
||||
pCmdQ2->finish();
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getGpgpuCommandStreamReceiver().getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingSingleDeviceDependenciesForGpgpuCsrThenNoSemaphoreWaitIsProgrammed) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
|
||||
cl_event outputEvent1{};
|
||||
cl_event inputEvent1 = &userEvent1;
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&inputEvent1,
|
||||
&outputEvent1);
|
||||
|
||||
auto event1 = castToObject<Event>(outputEvent1);
|
||||
|
||||
ASSERT_NE(nullptr, event1);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount());
|
||||
|
||||
cl_int retVal = CL_INVALID_PLATFORM;
|
||||
auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
|
||||
char hostPtr[MemoryConstants::pageSize]{};
|
||||
|
||||
cl_event outputEvent2{};
|
||||
|
||||
pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent1,
|
||||
&outputEvent2);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
|
||||
auto event2 = castToObject<Event>(outputEvent2);
|
||||
|
||||
ASSERT_NE(nullptr, event2);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount());
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&outputEvent2,
|
||||
nullptr);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
event1->release();
|
||||
event2->release();
|
||||
pCmdQ1->finish();
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingCrossDeviceDependenciesForBlitCsrThenProgramSemaphoreWaitOnUnblockingEvent) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(true);
|
||||
|
||||
for (auto &rootDeviceEnvironment : deviceFactory->rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments) {
|
||||
rootDeviceEnvironment->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
}
|
||||
|
||||
auto clCmdQ1 = clCreateCommandQueue(context.get(), deviceFactory->rootDevices[1], {}, nullptr);
|
||||
auto clCmdQ2 = clCreateCommandQueue(context.get(), deviceFactory->rootDevices[2], {}, nullptr);
|
||||
|
||||
pCmdQ1 = castToObject<CommandQueue>(clCmdQ1);
|
||||
pCmdQ2 = castToObject<CommandQueue>(clCmdQ2);
|
||||
ASSERT_NE(nullptr, pCmdQ1);
|
||||
ASSERT_NE(nullptr, pCmdQ2);
|
||||
|
||||
if (!pCmdQ1->getBcsCommandStreamReceiver()) {
|
||||
pCmdQ1->release();
|
||||
pCmdQ2->release();
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
UserEvent userEvent1(&pCmdQ1->getContext());
|
||||
|
||||
cl_event outputEvent1{};
|
||||
cl_event inputEvent1 = &userEvent1;
|
||||
|
||||
pCmdQ1->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&inputEvent1,
|
||||
&outputEvent1);
|
||||
|
||||
auto event1 = castToObject<Event>(outputEvent1);
|
||||
|
||||
ASSERT_NE(nullptr, event1);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount());
|
||||
|
||||
cl_int retVal = CL_INVALID_PLATFORM;
|
||||
auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
|
||||
char hostPtr[MemoryConstants::pageSize]{};
|
||||
|
||||
cl_event outputEvent2{};
|
||||
|
||||
pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent1,
|
||||
&outputEvent2);
|
||||
|
||||
auto event2 = castToObject<Event>(outputEvent2);
|
||||
|
||||
ASSERT_NE(nullptr, event2);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount());
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
|
||||
cl_event outputEvent3{};
|
||||
pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
|
||||
1,
|
||||
&outputEvent2,
|
||||
&outputEvent3);
|
||||
|
||||
auto event3 = castToObject<Event>(outputEvent3);
|
||||
|
||||
ASSERT_NE(nullptr, event3);
|
||||
EXPECT_EQ(CompletionStamp::notReady, event3->peekTaskCount());
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
|
||||
pCmdQ2->enqueueMarkerWithWaitList(
|
||||
1,
|
||||
&outputEvent3,
|
||||
nullptr);
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(0u, semaphores.size());
|
||||
}
|
||||
userEvent1.setStatus(CL_COMPLETE);
|
||||
event1->release();
|
||||
event2->release();
|
||||
event3->release();
|
||||
pCmdQ1->finish();
|
||||
pCmdQ2->finish();
|
||||
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(1u, semaphores.size());
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ1->getBcsCommandStreamReceiver()->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_LE(1u, semaphores.size());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getGpgpuCommandStreamReceiver().getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_EQ(2u, semaphores.size());
|
||||
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
|
||||
EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword());
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
{
|
||||
HardwareParse csHwParser;
|
||||
csHwParser.parseCommands<FamilyType>(pCmdQ2->getBcsCommandStreamReceiver()->getCS(0));
|
||||
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
|
||||
|
||||
EXPECT_LE(1u, semaphores.size());
|
||||
}
|
||||
buffer->release();
|
||||
pCmdQ1->release();
|
||||
pCmdQ2->release();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhenFlushingTaskThenWorkPartitionAllocationIsMadeResident) {
|
||||
|
@ -317,6 +317,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags);
|
||||
@ -795,6 +796,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
}
|
||||
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
|
||||
@ -1010,6 +1012,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
|
||||
if (blitProperties.outputTimestampPacket && profilingEnabled) {
|
||||
BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket);
|
||||
|
@ -118,7 +118,11 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const Vec3<size_t
|
||||
|
||||
auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
|
||||
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize + estimatePreBlitCommandSize();
|
||||
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDependencies) +
|
||||
(sizePerBlit * nBlits) +
|
||||
timestampCmdSize +
|
||||
estimatePreBlitCommandSize();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
Reference in New Issue
Block a user