Optimize number of dispatched pipe control commands

Related-To: NEO-6871

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-05-13 00:04:35 +00:00
committed by Compute-Runtime-Automation
parent a5d8a22bac
commit 24669e0219
10 changed files with 360 additions and 110 deletions

View File

@@ -249,6 +249,7 @@ struct CommandListCoreFamily : CommandListImp {
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
void addFlushRequiredCommand(bool flushOperationRequired, ze_event_handle_t signalEvent);
size_t cmdListCurrentStartOffset = 0;
bool containsAnyKernel = false;

View File

@@ -348,7 +348,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
if (appendPipeControlWithPostSync) {
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(!!event->signalScope, hwInfo);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
@@ -637,17 +637,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
hEvent, numWaitEvents, phWaitEvents);
auto event = Event::fromHandle(hEvent);
if (event) {
allocationStruct.needsFlush &= !event->signalScope;
}
if (allocationStruct.needsFlush) {
const auto &hwInfo = this->device->getHwInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
addFlushRequiredCommand(allocationStruct.needsFlush, hEvent);
return ret;
}
@@ -1152,20 +1142,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
appendEventForProfilingAllWalkers(hSignalEvent, false);
const auto &hwInfo = this->device->getHwInfo();
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
addFlushRequiredCommand(dstAllocationStruct.needsFlush, hSignalEvent);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
@@ -1249,19 +1226,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
return result;
}
const auto &hwInfo = this->device->getHwInfo();
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
}
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
addFlushRequiredCommand(dstAllocationStruct.needsFlush, hSignalEvent);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
@@ -1589,20 +1554,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
appendEventForProfilingAllWalkers(hSignalEvent, false);
const auto &hwInfo = this->device->getHwInfo();
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
auto event = Event::fromHandle(hSignalEvent);
if (event) {
hostPointerNeedsFlush &= !event->signalScope;
}
if (hostPointerNeedsFlush) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
addFlushRequiredCommand(hostPointerNeedsFlush, hSignalEvent);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
@@ -1687,7 +1639,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
args, hwInfo);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(!!event->signalScope, hwInfo);
if (this->partitionCount > 1) {
args.workloadPartitionOffset = true;
event->setPacketsInUse(this->partitionCount);
@@ -1846,7 +1798,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
Event::STATE_SIGNALED, args, hwInfo);
} else {
NEO::PipeControlArgs args;
bool applyScope = event->signalScope;
bool applyScope = !!event->signalScope;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(applyScope, hwInfo);
if (this->partitionCount > 1) {
event->setPacketsInUse(this->partitionCount);
@@ -2015,7 +1967,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
} else {
const auto &hwInfo = this->device->getHwInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(!!event->signalScope, hwInfo);
NEO::MemorySynchronizationCommands<GfxFamily>::setPostSyncExtraProperties(args,
hwInfo);
@@ -2316,4 +2268,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::addFlushRequiredCommand(bool flushOperationRequired, ze_event_handle_t signalEvent) {
if (isCopyOnly()) {
return;
}
auto event = Event::fromHandle(signalEvent);
if (event) {
flushOperationRequired &= !event->signalScope;
}
const auto &hwInfo = this->device->getHwInfo();
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(flushOperationRequired, hwInfo)) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
} // namespace L0

View File

@@ -161,7 +161,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto event = Event::fromHandle(hEvent);
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(!!event->signalScope, hwInfo);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -15,9 +15,9 @@ namespace L0 {
namespace ult {
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
using IsSKLOrKBL = IsWithinProducts<IGFX_SKYLAKE, IGFX_KABYLAKE>;
using IsGen9Core = IsWithinProducts<IGFX_SKYLAKE, IGFX_COFFEELAKE>;
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) {
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsGen9Core) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
createKernel();
ze_result_t returnValue;

View File

@@ -28,6 +28,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
: public ::L0::CommandListCoreFamily<gfxCoreFamily> {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
using BaseClass::addFlushRequiredCommand;
using BaseClass::appendBlitFill;
using BaseClass::appendCopyImageBlit;
using BaseClass::appendEventForProfiling;

View File

@@ -564,6 +564,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_SUBDEVICE;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr);
@@ -580,16 +581,12 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
(!cmd->getDcFlushEnable())) {
(cmd->getDcFlushEnable())) {
postSyncFound++;
}
}
EXPECT_EQ(1u, postSyncFound);
auto it = *(iterator.end() - 1);
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*it);
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd1->getDcFlushEnable());
}
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;

View File

@@ -245,13 +245,16 @@ HWTEST2_F(CommandListCreate,
device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer);
}
using PlatformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_ICELAKE>;
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) {
using PlatformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
HWTEST2_F(CommandListCreate,
givenCommandListWhenMemoryCopyRegionHavingHostMemoryWithSignalAndWaitScopeEventsUsingRenderEngineThenPipeControlsWithDcFlushIsFound,
PlatformSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *srcBuffer = reinterpret_cast<void *>(0x1234);
void *dstBuffer = reinterpret_cast<void *>(0x2345);
@@ -267,6 +270,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemor
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
events.push_back(event.get());
eventDesc.index = 1;
@@ -275,24 +279,39 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemor
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(7u, allPcCommands.size());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(2u, dcFlushPipeControls);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) {
HWTEST2_F(CommandListCreate,
givenCommandListWhenMemoryCopyRegionHavingDeviceMemoryWithNoSignalAndWaitScopeEventsUsingRenderEngineThenPipeControlWithDcFlushIsFound,
PlatformSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *srcBuffer = nullptr;
void *dstBuffer = nullptr;
@@ -322,27 +341,41 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMem
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(1u, dcFlushPipeControls);
context->freeMem(srcBuffer);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) {
HWTEST2_F(CommandListCreate,
givenCommandListWhenMemoryFillHavingDeviceMemoryWithSignalAndNoWaitScopeEventsUsingRenderEngineThenPipeControlWithDcFlushIsFound,
PlatformSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *dstBuffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
@@ -357,7 +390,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWit
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
events.push_back(event.get());
eventDesc.index = 1;
@@ -365,26 +398,40 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWit
events.push_back(event1.get());
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(1u, dcFlushPipeControls);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) {
HWTEST2_F(CommandListCreate,
givenCommandListWhenMemoryFillHavingSharedMemoryWithSignalAndWaitScopeEventsUsingRenderEngineThenPipeControlsWithDcFlushIsFound,
PlatformSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *dstBuffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
@@ -401,6 +448,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWit
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
events.push_back(event.get());
eventDesc.index = 1;
@@ -408,26 +456,39 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWit
events.push_back(event1.get());
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(7u, allPcCommands.size());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(2u, dcFlushPipeControls);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) {
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithSignalAndWaitScopeEventsUsingRenderEngineThenPipeControlWithDcFlushIsFound, PlatformSupport) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
@@ -451,16 +512,26 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS
events.push_back(event1.get());
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(2u, dcFlushPipeControls);
context->freeMem(dstBuffer);
}
@@ -472,6 +543,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
@@ -495,31 +567,42 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
events.push_back(event1.get());
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(6u, allPcCommands.size());
auto cmd = genCmdCast<PIPE_CONTROL *>(*allPcCommands.back());
EXPECT_TRUE(cmd->getDcFlushEnable());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(2u, dcFlushPipeControls);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDeviceScopeThenPCDueToWaitEventIsNotAddedAndPCDueToSignalEventIsAddedWithOutDCFlush, PlatformSupport) {
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDeviceScopeThenPCDueToWaitEventIsNotAddedAndPCDueToSignalEventIsAddedWithDCFlush, PlatformSupport) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
commandContainer.slmSize = 0;
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
@@ -543,30 +626,27 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
events.push_back(event1.get());
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_FALSE(cmd->getDcFlushEnable());
auto allPcCommands = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
uint32_t dcFlushPipeControls = 0;
for (auto it : allPcCommands) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getDcFlushEnable()) {
dcFlushPipeControls++;
}
}
EXPECT_EQ(1u, dcFlushPipeControls);
context->freeMem(dstBuffer);
}

View File

@@ -106,5 +106,134 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
using CommandListTest = Test<DeviceFixture>;
using IsDcFlushSupportedPlatform = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_XE_HP_CORE>;
HWTEST2_F(CommandListTest, givenCopyCommandListWhenRequiredFlushOperationThenExpectNoPipeControl, IsDcFlushSupportedPlatform) {
EXPECT_TRUE(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()));
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList->addFlushRequiredCommand(true, nullptr);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(usedBefore, usedAfter);
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenRequiredFlushOperationThenExpectPipeControlWithDcFlush, IsDcFlushSupportedPlatform) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
EXPECT_TRUE(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()));
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList->addFlushRequiredCommand(true, nullptr);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(sizeof(PIPE_CONTROL), usedAfter - usedBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(pipeControl, cmdList.end());
auto cmdPipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControl);
EXPECT_TRUE(cmdPipeControl->getDcFlushEnable());
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenNoRequiredFlushOperationThenExpectNoPipeControl, IsDcFlushSupportedPlatform) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
EXPECT_TRUE(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()));
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList->addFlushRequiredCommand(false, nullptr);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(usedBefore, usedAfter);
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenRequiredFlushOperationAndNoSignalScopeEventThenExpectPipeControlWithDcFlush, IsDcFlushSupportedPlatform) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
EXPECT_TRUE(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()));
ze_result_t result;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ze_event_handle_t eventHandle = event->toHandle();
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList->addFlushRequiredCommand(true, eventHandle);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(sizeof(PIPE_CONTROL), usedAfter - usedBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(pipeControl, cmdList.end());
auto cmdPipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControl);
EXPECT_TRUE(cmdPipeControl->getDcFlushEnable());
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenRequiredFlushOperationAndSignalScopeEventThenExpectNoPipeControl, IsDcFlushSupportedPlatform) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
EXPECT_TRUE(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()));
ze_result_t result;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ze_event_handle_t eventHandle = event->toHandle();
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, commandList);
ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList->addFlushRequiredCommand(true, eventHandle);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(usedBefore, usedAfter);
}
} // namespace ult
} // namespace L0

View File

@@ -209,20 +209,31 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledT
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
genCmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
uint32_t dcFlushPipeControl = 0;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
if (cmd->getDcFlushEnable()) {
dcFlushPipeControl++;
}
itor++;
}
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); // NOLINT(clang-analyzer-core.CallAndMessage)
uint32_t expectedDcFlushPipeControl =
NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo()) ? 1 : 0;
EXPECT_EQ(expectedDcFlushPipeControl, dcFlushPipeControl);
}
HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, IsAtLeastSkl) {

View File

@@ -7,6 +7,7 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/test_macros/test.h"
@@ -568,5 +569,65 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLater,
EXPECT_EQ(3u, postSyncPipeControls);
}
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, isXeHpOrXeHpgCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
auto &commandContainer = commandList->commandContainer;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_SUBDEVICE;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore),
usedAfter - usedBefore));
auto pipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
uint32_t postSyncFound = 0;
uint32_t dcFlushFound = 0;
ASSERT_NE(0u, pipeControls.size());
for (auto it : pipeControls) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
cmd->getImmediateData() == Event::STATE_SIGNALED) {
postSyncFound++;
}
if (cmd->getDcFlushEnable()) {
dcFlushFound++;
}
}
uint32_t expectedDcFlushFound = 2u;
auto &hwInfo = device->getHwInfo();
auto &hwInfoConfig = (*NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily));
const auto waPair = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, true);
if (waPair.first) {
expectedDcFlushFound++;
}
EXPECT_EQ(2u, postSyncFound);
EXPECT_EQ(expectedDcFlushFound, dcFlushFound);
}
} // namespace ult
} // namespace L0