Events workaround for L3Flush issue

Related-To: LOCI-2361

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
Aravind Gopalakrishnan
2021-09-11 07:19:55 +00:00
committed by Compute-Runtime-Automation
parent 522d2550ee
commit 0c2800d37f
5 changed files with 259 additions and 70 deletions

View File

@@ -110,6 +110,43 @@ void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t num
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void programEventL3Flush(ze_event_handle_t hEvent,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
auto event = Event::fromHandle(hEvent);
uint64_t eventAddress = event->getPacketAddress(device) + event->getSinglePacketSize();
bool isTimestampEvent = event->isEventTimestampFlagSet();
if (isTimestampEvent) {
eventAddress += event->getContextEndOffset();
}
if (partitionCount > 1) {
event->setPacketsInUse(event->getPacketsInUse() + partitionCount);
} else {
event->setPacketsInUse(event->getPacketsInUse() + 1);
}
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
if (partitionCount > 1) {
args.workloadPartitionOffset = true;
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(*commandContainer.getCommandStream(),
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
static_cast<uint32_t>(event->getSinglePacketSize()),
true);
}
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
eventAddress, Event::STATE_SIGNALED,
commandContainer.getDevice()->getHardwareInfo(),
args);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
@@ -228,9 +265,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->partitionCount = std::max(partitionCount, this->partitionCount);
if (hEvent) {
auto event = Event::fromHandle(hEvent);
if (isTimestampEvent && partitionCount > 1) {
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
if (L3FlushEnable) {
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
}
}
if (neoDevice->getDebugger()) {

View File

@@ -95,7 +95,7 @@ struct Event : _ze_event_handle_t {
};
template <typename TagSizeT>
class KernelTimestampsData : public NEO::TimestampPackets<TagSizeT> {
class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT> {
public:
uint32_t getPacketsUsed() const { return packetsUsed; }
void setPacketsUsed(uint32_t value) { packetsUsed = value; }
@@ -139,7 +139,7 @@ struct EventImp : public Event {
size_t getSinglePacketSize() const override { return NEO::TimestampPackets<TagSizeT>::getSinglePacketSize(); };
ze_result_t hostEventSetValue(uint32_t eventValue) override;
std::unique_ptr<KernelTimestampsData<TagSizeT>[]> kernelTimestampsData;
std::unique_ptr<KernelEventCompletionData<TagSizeT>[]> kernelEventCompletionData;
Device *device;
int index;
@@ -148,8 +148,9 @@ struct EventImp : public Event {
protected:
ze_result_t calculateProfilingData();
ze_result_t queryStatusKernelTimestamp();
ze_result_t queryStatusNonTimestamp();
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
void assignTimestampData(void *address);
void assignKernelEventCompletionData(void *address);
};
struct EventPool : _ze_event_pool_handle_t {

View File

@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
if (eventPool->isEventPoolTimestampFlagSet()) {
event->setEventTimestampFlag(true);
event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
}
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0);
globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0);
contextStartTS = kernelTimestampsData[0].getContextStartValue(0);
contextEndTS = kernelTimestampsData[0].getContextEndValue(0);
globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0);
globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0);
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
for (uint32_t i = 0; i < kernelCount; i++) {
for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) {
if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) {
globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId);
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
}
if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) {
contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId);
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
}
if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) {
contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId);
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
}
if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) {
globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId);
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
}
}
}
@@ -75,11 +75,12 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
}
template <typename TagSizeT>
void EventImp<TagSizeT>::assignTimestampData(void *address) {
void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed();
uint32_t packetsToCopy = 0;
packetsToCopy = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) {
kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address);
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
address = ptrOffset(address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
}
}
@@ -87,11 +88,27 @@ void EventImp<TagSizeT>::assignTimestampData(void *address) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
assignTimestampData(hostAddress);
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed();
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) {
if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) {
return ZE_RESULT_NOT_READY;
}
}
}
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) {
return ZE_RESULT_NOT_READY;
}
}
@@ -102,7 +119,6 @@ ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatus() {
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
if (metricStreamer != nullptr) {
*hostAddr = metricStreamer->getNotificationState();
@@ -110,9 +126,9 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
this->csr->downloadAllocations();
if (isEventTimestampFlagSet()) {
return queryStatusKernelTimestamp();
} else {
return queryStatusNonTimestamp();
}
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
@@ -130,7 +146,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
}
};
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed();
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++) {
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset());
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset());
@@ -139,7 +155,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
}
}
assignTimestampData(hostAddress);
assignKernelEventCompletionData(hostAddress);
return ZE_RESULT_SUCCESS;
}
@@ -208,14 +224,12 @@ ze_result_t EventImp<TagSizeT>::reset() {
if (isEventTimestampFlagSet()) {
kernelCount = EventPacketsCount::maxKernelSplit;
for (uint32_t i = 0; i < kernelCount; i++) {
kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
return ZE_RESULT_SUCCESS;
} else {
return hostEventSetValue(Event::STATE_INITIAL);
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
@@ -227,7 +241,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
return ZE_RESULT_NOT_READY;
}
assignTimestampData(hostAddress);
assignKernelEventCompletionData(hostAddress);
calculateProfilingData();
auto eventTsSetFunc = [&](uint64_t &timestampFieldToCopy, uint64_t &timestampFieldForWriting) {
@@ -288,10 +302,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
packetId = static_cast<NEO::SubDevice *>(deviceImp->neoDevice)->getSubDeviceIndex();
}
globalStartTs = kernelTimestampsData[timestampPacket].getGlobalStartValue(packetId);
contextStartTs = kernelTimestampsData[timestampPacket].getContextStartValue(packetId);
contextEndTs = kernelTimestampsData[timestampPacket].getContextEndValue(packetId);
globalEndTs = kernelTimestampsData[timestampPacket].getGlobalEndValue(packetId);
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);
contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId);
contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId);
globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId);
queryTsEventAssignFunc(result.global.kernelStart, globalStartTs);
queryTsEventAssignFunc(result.context.kernelStart, contextStartTs);
@@ -305,37 +319,31 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
template <typename TagSizeT>
void EventImp<TagSizeT>::resetPackets() {
for (uint32_t i = 0; i < kernelCount; i++) {
kernelTimestampsData[i].setPacketsUsed(1);
kernelEventCompletionData[i].setPacketsUsed(1);
}
kernelCount = 1;
}
template <typename TagSizeT>
uint32_t EventImp<TagSizeT>::getPacketsInUse() {
if (isEventTimestampFlagSet()) {
uint32_t packetsInUse = 0;
for (uint32_t i = 0; i < kernelCount; i++) {
packetsInUse += kernelTimestampsData[i].getPacketsUsed();
};
return packetsInUse;
} else {
return 1;
uint32_t packetsInUse = 0;
for (uint32_t i = 0; i < kernelCount; i++) {
packetsInUse += kernelEventCompletionData[i].getPacketsUsed();
}
return packetsInUse;
}
template <typename TagSizeT>
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value);
};
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
}
template <typename TagSizeT>
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
uint64_t address = getGpuAddress(device);
if (isEventTimestampFlagSet() && kernelCount > 1) {
for (uint32_t i = 0; i < kernelCount - 1; i++) {
address += kernelTimestampsData[i].getPacketsUsed() *
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
}
for (uint32_t i = 0; i < kernelCount - 1; i++) {
address += kernelEventCompletionData[i].getPacketsUsed() *
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
}
return address;
}

View File

@@ -640,15 +640,15 @@ TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventF
}
TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) {
EXPECT_NE(nullptr, event->kernelTimestampsData);
EXPECT_NE(nullptr, event->kernelEventCompletionData);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i));
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i));
}
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
@@ -692,7 +692,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
}
TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) {
EXPECT_NE(nullptr, event->kernelTimestampsData);
EXPECT_NE(nullptr, event->kernelEventCompletionData);
event->hostSignal();
ze_result_t result = event->queryStatus();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -702,12 +702,12 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i));
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i));
}
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
}
@@ -799,7 +799,7 @@ TEST_F(EventQueryTimestampExpWithSubDevice, givenEventWhenQuerytimestampExpWithS
uint32_t numPackets = 2;
for (uint32_t packetId = 0; packetId < numPackets; packetId++) {
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
}
uint32_t pCount = 0;
@@ -865,7 +865,7 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
uint32_t pCount = 2;
for (uint32_t packetId = 0; packetId < pCount; packetId++) {
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
}

View File

@@ -143,6 +143,146 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
}
using CommandListAppendLaunchKernelL3Flush = Test<ModuleFixture>;
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
EXPECT_LT(1u, pCommandList->partitionCount);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
uint32_t postSyncCount = 0u;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
postSyncCount++;
}
}
ASSERT_LE(1u, postSyncCount);
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
EXPECT_LT(1u, pCommandList->partitionCount);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
uint32_t postSyncCount = 0u;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
postSyncCount++;
}
}
ASSERT_LE(1u, postSyncCount);
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(0);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(1, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed()));
EXPECT_EQ(1u, pCommandList->partitionCount);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(cmdList.end(), itorLri);
}
HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpCore) {
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC;