Use postsync for copy and fill

Related-To: NEO-5968

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
Aravind Gopalakrishnan
2022-04-19 07:00:53 +00:00
committed by Compute-Runtime-Automation
parent f1574bebb4
commit cffe7f158a
9 changed files with 411 additions and 31 deletions

View File

@@ -235,6 +235,11 @@ struct CommandListCoreFamily : CommandListImp {
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
void appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition);
void programEventL3Flush(ze_event_handle_t hEvent,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer);
void adjustEventKernelCount(ze_event_handle_t hEvent);
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
void appendComputeBarrierCommand();
NEO::PipeControlArgs createBarrierFlags();

View File

@@ -224,8 +224,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
return ret;
}
return appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
hSignalEvent, false, false, true);
ret = appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
hSignalEvent, false, false, true);
if (ret) {
return ret;
}
if (hSignalEvent) {
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
}
return ret;
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -242,6 +251,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
appendEventForProfiling(hEvent, true, false);
ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
nullptr, true, false, false);
if (ret) {
return ret;
}
if (hEvent) {
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
}
appendSignalEventPostWalker(hEvent, false);
return ret;
@@ -276,7 +291,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
return ret;
}
}
if (hEvent) {
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
}
appendSignalEventPostWalker(hEvent, false);
return ret;
@@ -800,22 +817,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (beforeWalker) {
appendEventForProfiling(hEvent, true, false);
} else {
appendSignalEventPostWalker(hEvent, false);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1069,6 +1070,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
appendEventForProfilingAllWalkers(hSignalEvent, true);
adjustEventKernelCount(hSignalEvent);
if (ret == ZE_RESULT_SUCCESS && leftSize) {
Builtin func = Builtin::CopyBufferToBufferSide;
@@ -1128,6 +1130,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
isStateless);
}
if (hSignalEvent) {
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
}
appendEventForProfilingAllWalkers(hSignalEvent, false);
const auto &hwInfo = this->device->getHwInfo();
@@ -1135,9 +1140,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
auto event = Event::fromHandle(hSignalEvent);
if (event) {
dstAllocationStruct.needsFlush &= !event->signalScope;
dstAllocationStruct.needsFlush &= !event->l3FlushWaApplied;
}
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
dstAllocationStruct.needsFlush &= !isCopyOnly();
if (dstAllocationStruct.needsFlush) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -1452,6 +1460,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinFunction->setArgumentValue(2, sizeof(value), &value);
appendEventForProfilingAllWalkers(hSignalEvent, true);
adjustEventKernelCount(hSignalEvent);
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
@@ -1526,6 +1535,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
appendEventForProfilingAllWalkers(hSignalEvent, true);
adjustEventKernelCount(hSignalEvent);
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
@@ -1564,6 +1574,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
}
if (hSignalEvent) {
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
}
appendEventForProfilingAllWalkers(hSignalEvent, false);
const auto &hwInfo = this->device->getHwInfo();
@@ -1571,8 +1584,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
auto event = Event::fromHandle(hSignalEvent);
if (event) {
hostPointerNeedsFlush &= !event->signalScope;
hostPointerNeedsFlush &= !event->l3FlushWaApplied;
}
hostPointerNeedsFlush &= !isCopyOnly();
if (hostPointerNeedsFlush) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;

View File

@@ -32,6 +32,26 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
return helper.getRenderSurfaceStateSize();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (beforeWalker) {
appendEventForProfiling(hEvent, true, false);
} else {
appendSignalEventPostWalker(hEvent, false);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
@@ -174,6 +194,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
if (hEvent) {
auto event = Event::fromHandle(hEvent);
event->kernelCount = 1;
}
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}

View File

@@ -84,14 +84,20 @@ void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t num
}
template <GFXCORE_FAMILY gfxCoreFamily>
void programEventL3Flush(ze_event_handle_t hEvent,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer) {
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
auto event = Event::fromHandle(hEvent);
const auto &hwInfo = this->device->getHwInfo();
bool L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
if (!L3FlushEnable || isCopyOnly()) {
return;
}
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
: event->getSinglePacketSize();
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
@@ -121,6 +127,13 @@ void programEventL3Flush(ze_event_handle_t hEvent,
args);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (hEvent && isCopyOnly()) {
appendSignalEventPostWalker(hEvent, false);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
@@ -165,6 +178,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
commandContainer.addToResidencyContainer(eventAlloc);
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
}
@@ -238,9 +252,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
if (L3FlushEnable) {
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
}
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
}
if (neoDevice->getDebugger()) {
@@ -292,6 +304,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
if (hEvent) {
auto event = Event::fromHandle(hEvent);
event->kernelCount += 1;
}
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
if (hEvent) {
auto event = Event::fromHandle(hEvent);
if (!isCopyOnly()) {
event->kernelCount = 0u;
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),

View File

@@ -80,8 +80,8 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
eventSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment));
size_t alignedSize = alignUp<size_t>(numEvents * eventSize, MemoryConstants::pageSize64k);
NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER
: NEO::AllocationType::BUFFER_HOST_MEMORY;
NEO::AllocationType allocationType = NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
if (this->devices.size() > 1) {
useDeviceAlloc = false;
}

View File

@@ -393,7 +393,8 @@ uint32_t EventImp<TagSizeT>::getPacketsUsedInLastKernel() {
template <typename TagSizeT>
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
auto kernelIndex = getCurrKernelDataIndex();
kernelEventCompletionData[kernelIndex].setPacketsUsed(value);
}
template <typename TagSizeT>

View File

@@ -855,6 +855,39 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
context->freeMem(alloc);
}
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithEventThenSuccessIsReturned) {
Mock<::L0::Kernel> kernel;
kernel.groupSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.index = 0;
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
std::unique_ptr<Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
void *alloc = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
static_cast<ze_group_count_t *>(alloc),
hEventHandle, 0, nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
context->freeMem(alloc);
}
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWorkGroupCountAndGlobalWorkSizeAndWorkDimIsSetInCrossThreadData) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
@@ -1202,6 +1235,37 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
context->freeMem(reinterpret_cast<void *>(numLaunchArgs));
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMultipleKernelsWithEventThenSuccessIsReturned) {
createKernel();
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using MI_MATH = typename FamilyType::MI_MATH;
ze_result_t returnValue;
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
uint32_t *numLaunchArgs;
const uint32_t numKernels = 3;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.index = 0;
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
std::unique_ptr<Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ze_event_handle_t hEventHandle = event->toHandle();
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, hEventHandle, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->freeMem(reinterpret_cast<void *>(numLaunchArgs));
}
HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCooperativeKernelIsCalledThenErrorIsReturned) {
createKernel();
@@ -1213,6 +1277,55 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCoo
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
}
HWTEST_F(CommandListAppendLaunchKernel, givenAppendLaunchCooperativeKernelIsCalledWithEventWithHostScopeThenSuccessIsReturned) {
createKernel();
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenAppendLaunchCooperativeKernelIsCalledWithNoEventScopeThenSuccessIsReturnedAndL3WaNotApplied, IsXeHpCore) {
createKernel();
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_FALSE(event->l3FlushWaApplied);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -320,6 +320,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
EXPECT_EQ(1u, event->getPacketsInUse());
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u);
EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u);

View File

@@ -404,6 +404,124 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
EXPECT_EQ(data[0].globalEnd, tsResult.global.kernelEnd);
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenCopyCommandListWithAppendMemoryCopyThenL3FlushWaNotApplied, IsXeHpCore) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ze_device_mem_alloc_desc_t deviceDesc = {};
void *srcBuffer = nullptr;
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &srcBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, 16384u, event->toHandle(), 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_FALSE(event->l3FlushWaApplied);
context->freeMem(dstBuffer);
context->freeMem(srcBuffer);
}
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenCopyCommandListWithAppendMemoryFillThenL3FlushWaNotApplied, IsXeHpCore) {
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
int one = 1;
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_FALSE(event->l3FlushWaApplied);
context->freeMem(dstBuffer);
}
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListKernelLaunchError : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandListKernelLaunchError() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent,
bool isIndirect,
bool isPredicate,
bool isCooperative) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
};
HWTEST2_F(CommandListCreate, whenReturningErrorFromLaunchKernelWithParamsThenLaunchIndirectIsUnsuccessful, IsXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
Mock<::L0::Kernel> kernel;
kernel.groupSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
auto commandList = std::make_unique<WhiteBox<MockCommandListKernelLaunchError<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *alloc = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
static_cast<ze_group_count_t *>(alloc),
nullptr, 0, nullptr);
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
context->freeMem(alloc);
}
HWTEST2_F(CommandListCreate, whenReturningErrorFromLaunchKernelWithParamsThenLaunchiCooperativeKernelIsUnsuccessful, IsXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
Mock<::L0::Kernel> kernel;
kernel.groupSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
auto commandList = std::make_unique<WhiteBox<MockCommandListKernelLaunchError<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
returnValue = commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(returnValue, ZE_RESULT_ERROR_UNKNOWN);
}
HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpCore) {
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC;
@@ -520,6 +638,68 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryFillWithSignalEventThenL3FlushWaApplied, IsXeHpCore) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
void *dstBuffer = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
int one = 1;
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(event->l3FlushWaApplied);
context->freeMem(dstBuffer);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryCopyWithSignalEventThenL3FlushWaApplied, IsXeHpCore) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
void *dstBuffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
void *srcBuffer = nullptr;
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &srcBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, 16384u, event->toHandle(), 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(event->l3FlushWaApplied);
context->freeMem(dstBuffer);
context->freeMem(srcBuffer);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegCommandIsAdded, IsXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;