mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-12 09:30:36 +08:00
Use postsync for copy and fill
Related-To: NEO-5968 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f1574bebb4
commit
cffe7f158a
@@ -235,6 +235,11 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition);
|
||||
void programEventL3Flush(ze_event_handle_t hEvent,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer);
|
||||
void adjustEventKernelCount(ze_event_handle_t hEvent);
|
||||
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
|
||||
@@ -224,8 +224,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
||||
return ret;
|
||||
}
|
||||
|
||||
return appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
|
||||
hSignalEvent, false, false, true);
|
||||
ret = appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
|
||||
hSignalEvent, false, false, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -242,6 +251,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
|
||||
nullptr, true, false, false);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
if (hEvent) {
|
||||
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
|
||||
return ret;
|
||||
@@ -276,7 +291,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (hEvent) {
|
||||
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
|
||||
return ret;
|
||||
@@ -800,22 +817,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
|
||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
@@ -1069,6 +1070,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
}
|
||||
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
||||
adjustEventKernelCount(hSignalEvent);
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && leftSize) {
|
||||
Builtin func = Builtin::CopyBufferToBufferSide;
|
||||
@@ -1128,6 +1130,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
isStateless);
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, false);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
@@ -1135,9 +1140,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
auto event = Event::fromHandle(hSignalEvent);
|
||||
if (event) {
|
||||
dstAllocationStruct.needsFlush &= !event->signalScope;
|
||||
dstAllocationStruct.needsFlush &= !event->l3FlushWaApplied;
|
||||
}
|
||||
|
||||
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
|
||||
dstAllocationStruct.needsFlush &= !isCopyOnly();
|
||||
|
||||
if (dstAllocationStruct.needsFlush) {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
@@ -1452,6 +1460,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
||||
adjustEventKernelCount(hSignalEvent);
|
||||
|
||||
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
|
||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||
@@ -1526,6 +1535,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
|
||||
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
||||
adjustEventKernelCount(hSignalEvent);
|
||||
|
||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
||||
@@ -1564,6 +1574,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, false);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
@@ -1571,8 +1584,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
auto event = Event::fromHandle(hSignalEvent);
|
||||
if (event) {
|
||||
hostPointerNeedsFlush &= !event->signalScope;
|
||||
hostPointerNeedsFlush &= !event->l3FlushWaApplied;
|
||||
}
|
||||
|
||||
hostPointerNeedsFlush &= !isCopyOnly();
|
||||
|
||||
if (hostPointerNeedsFlush) {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
|
||||
@@ -32,6 +32,26 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
|
||||
return helper.getRenderSurfaceStateSize();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
@@ -174,6 +194,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
if (hEvent) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
event->kernelCount = 1;
|
||||
}
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
|
||||
|
||||
|
||||
@@ -84,14 +84,20 @@ void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t num
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void programEventL3Flush(ze_event_handle_t hEvent,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
bool L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
||||
if (!L3FlushEnable || isCopyOnly()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
|
||||
: event->getSinglePacketSize();
|
||||
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
|
||||
@@ -121,6 +127,13 @@ void programEventL3Flush(ze_event_handle_t hEvent,
|
||||
args);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (hEvent && isCopyOnly()) {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
@@ -165,6 +178,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
commandContainer.addToResidencyContainer(eventAlloc);
|
||||
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
||||
isTimestampEvent = event->isUsingContextEndOffset();
|
||||
|
||||
eventAddress = event->getPacketAddress(this->device);
|
||||
}
|
||||
|
||||
@@ -238,9 +252,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
}
|
||||
if (L3FlushEnable) {
|
||||
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
|
||||
}
|
||||
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
|
||||
}
|
||||
|
||||
if (neoDevice->getDebugger()) {
|
||||
@@ -292,6 +304,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
if (hEvent) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
event->kernelCount += 1;
|
||||
}
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
|
||||
if (hEvent) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
if (!isCopyOnly()) {
|
||||
event->kernelCount = 0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
|
||||
|
||||
@@ -80,8 +80,8 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
|
||||
eventSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment));
|
||||
|
||||
size_t alignedSize = alignUp<size_t>(numEvents * eventSize, MemoryConstants::pageSize64k);
|
||||
NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER
|
||||
: NEO::AllocationType::BUFFER_HOST_MEMORY;
|
||||
NEO::AllocationType allocationType = NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
|
||||
|
||||
if (this->devices.size() > 1) {
|
||||
useDeviceAlloc = false;
|
||||
}
|
||||
|
||||
@@ -393,7 +393,8 @@ uint32_t EventImp<TagSizeT>::getPacketsUsedInLastKernel() {
|
||||
|
||||
template <typename TagSizeT>
|
||||
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
|
||||
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
|
||||
auto kernelIndex = getCurrKernelDataIndex();
|
||||
kernelEventCompletionData[kernelIndex].setPacketsUsed(value);
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
|
||||
@@ -855,6 +855,39 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
|
||||
|
||||
context->freeMem(alloc);
|
||||
}
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithEventThenSuccessIsReturned) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
kernel.groupSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.index = 0;
|
||||
|
||||
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
std::unique_ptr<Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
void *alloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
|
||||
static_cast<ze_group_count_t *>(alloc),
|
||||
hEventHandle, 0, nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWorkGroupCountAndGlobalWorkSizeAndWorkDimIsSetInCrossThreadData) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
@@ -1202,6 +1235,37 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
|
||||
context->freeMem(reinterpret_cast<void *>(numLaunchArgs));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMultipleKernelsWithEventThenSuccessIsReturned) {
|
||||
createKernel();
|
||||
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
|
||||
uint32_t *numLaunchArgs;
|
||||
const uint32_t numKernels = 3;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(
|
||||
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.index = 0;
|
||||
|
||||
std::unique_ptr<EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
std::unique_ptr<Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
|
||||
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, hEventHandle, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
context->freeMem(reinterpret_cast<void *>(numLaunchArgs));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCooperativeKernelIsCalledThenErrorIsReturned) {
|
||||
createKernel();
|
||||
|
||||
@@ -1213,6 +1277,55 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCoo
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenAppendLaunchCooperativeKernelIsCalledWithEventWithHostScopeThenSuccessIsReturned) {
|
||||
createKernel();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenAppendLaunchCooperativeKernelIsCalledWithNoEventScopeThenSuccessIsReturnedAndL3WaNotApplied, IsXeHpCore) {
|
||||
createKernel();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_FALSE(event->l3FlushWaApplied);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -320,6 +320,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
||||
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u);
|
||||
EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u);
|
||||
|
||||
@@ -404,6 +404,124 @@ HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWit
|
||||
EXPECT_EQ(data[0].globalEnd, tsResult.global.kernelEnd);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenCopyCommandListWithAppendMemoryCopyThenL3FlushWaNotApplied, IsXeHpCore) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
void *srcBuffer = nullptr;
|
||||
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &srcBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, 16384u, event->toHandle(), 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_FALSE(event->l3FlushWaApplied);
|
||||
context->freeMem(dstBuffer);
|
||||
context->freeMem(srcBuffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenCopyCommandListWithAppendMemoryFillThenL3FlushWaNotApplied, IsXeHpCore) {
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
int one = 1;
|
||||
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
|
||||
event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_FALSE(event->l3FlushWaApplied);
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockCommandListKernelLaunchError : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
||||
public:
|
||||
MockCommandListKernelLaunchError() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
|
||||
|
||||
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent,
|
||||
bool isIndirect,
|
||||
bool isPredicate,
|
||||
bool isCooperative) override {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(CommandListCreate, whenReturningErrorFromLaunchKernelWithParamsThenLaunchIndirectIsUnsuccessful, IsXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
Mock<::L0::Kernel> kernel;
|
||||
kernel.groupSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandListKernelLaunchError<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
void *alloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
|
||||
static_cast<ze_group_count_t *>(alloc),
|
||||
nullptr, 0, nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
|
||||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, whenReturningErrorFromLaunchKernelWithParamsThenLaunchiCooperativeKernelIsUnsuccessful, IsXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
Mock<::L0::Kernel> kernel;
|
||||
kernel.groupSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2;
|
||||
kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandListKernelLaunchError<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
returnValue = commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_ERROR_UNKNOWN);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpCore) {
|
||||
using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC;
|
||||
|
||||
@@ -520,6 +638,68 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryFillWithSignalEventThenL3FlushWaApplied, IsXeHpCore) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
int one = 1;
|
||||
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
|
||||
event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(event->l3FlushWaApplied);
|
||||
context->freeMem(dstBuffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryCopyWithSignalEventThenL3FlushWaApplied, IsXeHpCore) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
eventDesc.wait = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
void *srcBuffer = nullptr;
|
||||
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &srcBuffer);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, 16384u, event->toHandle(), 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_TRUE(event->l3FlushWaApplied);
|
||||
context->freeMem(dstBuffer);
|
||||
context->freeMem(srcBuffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegCommandIsAdded, IsXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
Reference in New Issue
Block a user