L0: Capability to set RelaxedOrdering mode per call

This is prework for future RelaxedOrdering mode optimizations

Related-To: NEO-7458

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-01-27 14:09:15 +00:00
committed by Compute-Runtime-Automation
parent 7f5fae4c2f
commit 807b7bec65
43 changed files with 697 additions and 597 deletions

View File

@@ -86,39 +86,39 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr,
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) = 0;
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) = 0;
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size,
ze_memory_advice_t advice) = 0;
virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0;
virtual ze_result_t appendMemoryCopyRegion(void *dstPtr,
const ze_copy_region_t *dstRegion,
@@ -130,10 +130,10 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern,
size_t patternSize, size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) = 0;
@@ -141,7 +141,7 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc,
const void *srcptr, size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual void *asMutable() { return nullptr; };

View File

@@ -85,45 +85,45 @@ struct CommandListCoreFamily : CommandListImp {
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override;
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) override;
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) override;
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMemAdvise(ze_device_handle_t hDevice,
const void *ptr, size_t size,
ze_memory_advice_t advice) override;
ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
NEO::GraphicsAllocation *srcAllocation,
size_t size,
@@ -138,13 +138,13 @@ struct CommandListCoreFamily : CommandListImp {
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) override;
ze_result_t appendMemoryFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override;
ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override;
@@ -170,7 +170,7 @@ struct CommandListCoreFamily : CommandListImp {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent);
@@ -209,14 +209,14 @@ struct CommandListCoreFamily : CommandListImp {
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
@@ -224,13 +224,13 @@ struct CommandListCoreFamily : CommandListImp {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents);
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch);
MOCKABLE_VIRTUAL ze_result_t appendBlitFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents);
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch);
MOCKABLE_VIRTUAL ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,

View File

@@ -240,7 +240,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) {
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -253,7 +253,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -285,9 +285,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) {
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -309,9 +309,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -339,9 +339,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -451,7 +451,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto image = Image::fromHandle(hDstImage);
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->ImageElementSizeInBytes);
@@ -574,7 +574,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams);
launchParams, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -583,7 +583,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto image = Image::fromHandle(hSrcImage);
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->ImageElementSizeInBytes);
@@ -711,7 +711,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams);
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
addFlushRequiredCommand(allocationStruct.needsFlush, event);
@@ -725,7 +725,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto dstImage = L0::Image::fromHandle(hDstImage);
auto srcImage = L0::Image::fromHandle(hSrcImage);
cl_int4 srcOffset, dstOffset;
@@ -831,7 +831,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams);
launchParams, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -839,10 +839,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopy(ze_image_handl
ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
return this->appendImageCopyRegion(hDstImage, hSrcImage, nullptr, nullptr, hEvent,
numWaitEvents, phWaitEvents);
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -996,7 +996,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
dstRegion.originX += static_cast<uint32_t>(dstOffset);
srcRegion.originX += static_cast<uint32_t>(srcOffset);
uint32_t bytesPerPixel = NEO::BlitCommandsHelper<GfxFamily>::getAvailableBytesPerPixel(copySize.x, srcRegion.originX, dstRegion.originX, srcSize.x, dstSize.x);
@@ -1015,7 +1015,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -1122,7 +1122,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
uintptr_t start = reinterpret_cast<uintptr_t>(dstptr);
bool isStateless = false;
@@ -1168,7 +1168,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
isStateless = true;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
@@ -1287,7 +1287,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -1328,16 +1328,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_result_t result = ZE_RESULT_SUCCESS;
if (srcRegion->depth > 1) {
result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, srcAllocationStruct.offset, dstAllocationStruct.offset, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, signalEvent, numWaitEvents, phWaitEvents)
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch)
: this->appendMemoryCopyKernel3d(&dstAllocationStruct, &srcAllocationStruct,
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, signalEvent, numWaitEvents, phWaitEvents);
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
} else {
result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, srcAllocationStruct.offset, dstAllocationStruct.offset, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, signalEvent, numWaitEvents, phWaitEvents)
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch)
: this->appendMemoryCopyKernel2d(&dstAllocationStruct, &srcAllocationStruct,
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcAllocationStruct.offset, signalEvent, numWaitEvents, phWaitEvents);
srcRegion, srcPitch, srcAllocationStruct.offset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
if (result) {
@@ -1371,7 +1371,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
size_t srcOffset,
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
@@ -1423,7 +1423,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, signalEvent, numWaitEvents,
phWaitEvents, launchParams);
phWaitEvents, launchParams, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1438,7 +1438,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
size_t srcOffset,
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
@@ -1490,7 +1490,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
&dispatchKernelArgs, signalEvent,
numWaitEvents,
phWaitEvents,
launchParams);
launchParams, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1535,7 +1535,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
bool isStateless = false;
NEO::Device *neoDevice = device->getNEODevice();
@@ -1560,10 +1560,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
if (isCopyOnly()) {
return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents);
return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (res) {
return res;
}
@@ -1745,13 +1745,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
size_t size,
Event *signalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
auto neoDevice = device->getNEODevice();
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
if (gfxCoreHelper.getMaxFillPaternSizeForCopyEngine() < patternSize) {
return ZE_RESULT_ERROR_INVALID_SIZE;
} else {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
if (ret) {
return ret;
}
@@ -1999,9 +1999,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
}
bool relaxedOrdering = ((this->cmdListType == TYPE_IMMEDIATE) && csr->directSubmissionRelaxedOrderingEnabled() && relaxedOrderingAllowed);
if (relaxedOrdering) {
if (relaxedOrderingAllowed) {
// Indirect BB_START operates only on GPR_0
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(*commandContainer.getCommandStream(), CS_GPR_R0, CS_GPR_R4);
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(*commandContainer.getCommandStream(), CS_GPR_R0 + 4, CS_GPR_R4 + 4);
@@ -2016,7 +2014,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
packetsToWait = event->getMaxPacketsCount();
}
for (uint32_t i = 0u; i < packetsToWait; i++) {
if (relaxedOrdering) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, eventStateClear,
NEO::CompareOperation::Equal, true);
} else {
@@ -2177,9 +2175,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyFromContext(
void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents);
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2278,7 +2276,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto appendResult = appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams);
phWaitEvents, launchParams, false);
if (appendResult != ZE_RESULT_SUCCESS) {
return appendResult;
}

View File

@@ -45,12 +45,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override;
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendBarrier(ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
@@ -61,7 +61,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMemoryCopyRegion(void *dstPtr,
const ze_copy_region_t *dstRegion,
@@ -73,13 +73,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMemoryFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
@@ -96,27 +96,27 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage,
const void *srcPtr,
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopyToMemory(void *dstPtr,
ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopy(
ze_image_handle_t dst, ze_image_handle_t src,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage,
ze_image_handle_t hSrcImage,
@@ -124,7 +124,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
@@ -137,7 +137,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) override;
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) override;
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
ze_result_t executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, CommandQueue *cmdQ);
@@ -165,6 +165,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
protected:
void printKernelsPrintfOutput(bool hangDetected);
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
std::atomic<bool> dependenciesPresent{false};
};

View File

@@ -284,11 +284,16 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::waitForEventsFromHost() {
return true;
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
return (this->csr->directSubmissionRelaxedOrderingEnabled() && numWaitEvents > 0);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) {
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
@@ -301,24 +306,30 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
phWaitEvents = nullptr;
}
}
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
hSignalEvent, numWaitEvents, phWaitEvents,
launchParams);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
launchParams, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -335,7 +346,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -345,7 +356,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
@@ -363,16 +374,19 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
}
}
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size);
if (isSplitNeeded) {
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, true, (numWaitEvents > 0), [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, numWaitEvents, phWaitEvents);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, true, relaxedOrderingDispatch, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
});
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
numWaitEvents, phWaitEvents);
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
return flushImmediate(ret, true, false, (numWaitEvents > 0) || isSplitNeeded, hSignalEvent);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -387,7 +401,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
@@ -396,9 +410,12 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
ze_result_t ret;
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
if (isSplitNeeded) {
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, true, (numWaitEvents > 0), [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, true, relaxedOrderingDispatch, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
ze_copy_region_t dstRegionLocal = {};
ze_copy_region_t srcRegionLocal = {};
memcpy(&dstRegionLocal, dstRegion, sizeof(ze_copy_region_t));
@@ -409,15 +426,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
srcRegionLocal.width = static_cast<uint32_t>(sizeParam);
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, &dstRegionLocal, dstPitch, dstSlicePitch,
srcPtr, &srcRegionLocal, srcPitch, srcSlicePitch,
hSignalEventParam, numWaitEvents, phWaitEvents);
hSignalEventParam, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
});
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch,
srcPtr, srcRegion, srcPitch, srcSlicePitch,
hSignalEvent, numWaitEvents, phWaitEvents);
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
return flushImmediate(ret, true, false, (numWaitEvents > 0) || isSplitNeeded, hSignalEvent);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -425,15 +442,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
size_t patternSize, size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -472,10 +492,14 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
ze_result_t ret;
auto isSplitNeeded = this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size);
bool relaxedOrdering = false;
if (isSplitNeeded) {
relaxedOrdering = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, false, false, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, false, relaxedOrdering, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
this->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u,
srcAddressParam, srcAllocation, 0u,
sizeParam);
@@ -484,7 +508,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
}
return flushImmediate(ret, false, false, isSplitNeeded, nullptr);
return flushImmediate(ret, false, false, relaxedOrdering, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -502,7 +526,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, (numEvents > 0), nullptr);
return flushImmediate(ret, true, true, false, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -516,15 +540,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyFromContext(
void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
return CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents);
return CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -532,10 +556,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopy(
ze_image_handle_t dst, ze_image_handle_t src,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
return CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion(dst, src, nullptr, nullptr, hSignalEvent,
numWaitEvents, phWaitEvents);
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -545,15 +569,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -563,16 +590,19 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
const ze_image_region_t *pDstRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -582,16 +612,19 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -606,7 +639,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -614,13 +647,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) {
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
checkWaitEventsState(numWaitEvents, waitEventHandles);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -121,7 +121,7 @@ ze_result_t DeviceImp::submitCopyForP2P(ze_device_handle_t hPeerDevice, ze_bool_
contextImp->allocDeviceMem(this->toHandle(), &deviceDesc, 8, 1, &memory);
contextImp->allocDeviceMem(hPeerDevice, &peerDeviceDesc, 8, 1, &peerMemory);
auto ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr);
auto ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr, false);
L0::CommandList::fromHandle(commandList)->close();
if (ret == ZE_RESULT_SUCCESS) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,7 @@ TEST_F(AUBHelloWorldL0, whenAppendMemoryCopyIsCalledThenMemoryIsProperlyCopied)
auto dstMemory = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties);
memset(srcMemory, val, size);
commandList->appendMemoryCopy(dstMemory, srcMemory, size, 0, 0, nullptr);
commandList->appendMemoryCopy(dstMemory, srcMemory, size, nullptr, 0, nullptr, false);
commandList->close();
auto pHCmdList = std::make_unique<ze_command_list_handle_t>(commandList->toHandle());

View File

@@ -135,7 +135,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -178,7 +178,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -198,7 +198,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -245,7 +245,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -265,7 +265,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -288,7 +288,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -334,7 +334,7 @@ void CmdListPipelineSelectStateFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -396,7 +396,7 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateRegularImmediate() {
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = regularCommandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = regularCommandListStream.getUsed();
@@ -442,7 +442,7 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateRegularImmediate() {
size_t csrUsedBefore = csrStream.getUsed();
sizeBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
@@ -504,7 +504,7 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
size_t csrUsedBefore = csrStream.getUsed();
sizeBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
@@ -546,7 +546,7 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
auto &cmdQueueStream = commandQueue->commandStream;
sizeBefore = regularCommandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = regularCommandListStream.getUsed();
@@ -608,7 +608,7 @@ void CmdListThreadArbitrationFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -651,7 +651,7 @@ void CmdListThreadArbitrationFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -671,7 +671,7 @@ void CmdListThreadArbitrationFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -718,7 +718,7 @@ void CmdListThreadArbitrationFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -738,7 +738,7 @@ void CmdListThreadArbitrationFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -761,7 +761,7 @@ void CmdListThreadArbitrationFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -807,7 +807,7 @@ void CmdListThreadArbitrationFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -870,7 +870,7 @@ void CmdListLargeGrfFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -913,7 +913,7 @@ void CmdListLargeGrfFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -933,7 +933,7 @@ void CmdListLargeGrfFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -980,7 +980,7 @@ void CmdListLargeGrfFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -1000,7 +1000,7 @@ void CmdListLargeGrfFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -1023,7 +1023,7 @@ void CmdListLargeGrfFixture::testBody() {
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
@@ -1069,7 +1069,7 @@ void CmdListLargeGrfFixture::testBody() {
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,7 +28,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWit
EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize);
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();

View File

@@ -118,7 +118,7 @@ GEN9TEST_F(CommandListCreateGen9, GivenDisabledMidThreadPreemptionWhenLaunchingK
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close();
@@ -148,7 +148,7 @@ GEN9TEST_F(CommandListCreateGen9, GivenUsesFencesForReadWriteImagesWhenLaunching
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernelMidBatch.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
auto result = commandList->close();
@@ -183,11 +183,11 @@ GEN9TEST_F(CommandListCreateGen9, WhenCommandListHasLowerPreemptionLevelThenDoNo
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
commandList->appendLaunchKernel(kernelMidThread.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close();

View File

@@ -96,19 +96,19 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendEventMultipleKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pNumLaunchArguments, pLaunchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents);
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendEventKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents);
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
size_t getOwnedPrivateAllocationsSize() {
@@ -200,21 +200,21 @@ struct MockCommandList : public CommandList {
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams));
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles));
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numKernels,
@@ -223,7 +223,7 @@ struct MockCommandList : public CommandList {
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendEventReset, ze_result_t, ZE_RESULT_SUCCESS,
(ze_event_handle_t hEvent));
@@ -247,7 +247,7 @@ struct MockCommandList : public CommandList {
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendImageCopyToMemory, ze_result_t, ZE_RESULT_SUCCESS,
(void *dstptr,
@@ -255,7 +255,7 @@ struct MockCommandList : public CommandList {
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendImageCopyRegion, ze_result_t, ZE_RESULT_SUCCESS,
(ze_image_handle_t hDstImage,
@@ -264,14 +264,14 @@ struct MockCommandList : public CommandList {
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendImageCopy, ze_result_t, ZE_RESULT_SUCCESS,
(ze_image_handle_t hDstImage,
ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendMemAdvise, ze_result_t, ZE_RESULT_SUCCESS,
(ze_device_handle_t hDevice,
@@ -285,7 +285,7 @@ struct MockCommandList : public CommandList {
size_t size,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendPageFaultCopy, ze_result_t, ZE_RESULT_SUCCESS,
(NEO::GraphicsAllocation * dstptr,
@@ -304,7 +304,7 @@ struct MockCommandList : public CommandList {
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendMemoryPrefetch, ze_result_t, ZE_RESULT_SUCCESS,
(const void *ptr,
@@ -317,7 +317,7 @@ struct MockCommandList : public CommandList {
size_t size,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(appendSignalEvent, ze_result_t, ZE_RESULT_SUCCESS,
(ze_event_handle_t hEvent));
@@ -348,7 +348,7 @@ struct MockCommandList : public CommandList {
size_t size,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch));
ADDMETHOD_NOBASE(reserveSpace, ze_result_t, ZE_RESULT_SUCCESS,
(size_t size,
@@ -460,10 +460,10 @@ class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents);
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
@@ -472,10 +472,10 @@ class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
L0::Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents);
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
@@ -488,10 +488,10 @@ class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, signalEvent, numWaitEvents, phWaitEvents);
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
}
uintptr_t srcAlignedPtr;
uintptr_t dstAlignedPtr;
@@ -549,7 +549,7 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override {
const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) override {
const auto kernel = Kernel::fromHandle(kernelHandle);
cmdListHelper.isaAllocation = kernel->getIsaAllocation();

View File

@@ -62,7 +62,7 @@ HWTEST2_F(CommandQueueLinuxTests, givenExecBufferErrorOnXeHpcWhenExecutingComman
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams, false);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};

View File

@@ -13,6 +13,7 @@
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_cpu_page_fault_manager.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/test_macros/hw_test.h"
@@ -1019,20 +1020,20 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.hasStallingCmds, bbFlag);
};
// non-pipelined state
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams), false, true);
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false), false, true);
// non-pipelined state already programmed
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams), false, false);
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false), false, false);
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendBarrier(nullptr, 0, nullptr), true, true);
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendEventReset(event), true, true);
@@ -1055,23 +1056,26 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, 0, nullptr, false), false, false);
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, 0, nullptr, false), false, false);
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyFlags(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, 0, nullptr), true, true);
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr), false, false);
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, false), false, false);
driverHandle->releaseImportedPointer(dstPtr);
}
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
ze_result_t returnValue;
@@ -1106,6 +1110,9 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList->csr);
ultCsr->recordFlusheBatchBuffer = true;
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies, dispatchFlag);
@@ -1116,22 +1123,22 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, launchParams),
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, launchParams, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendBarrier(nullptr, numWaitlistEvents, waitlist),
false, false);
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendEventReset(event), false, false);
@@ -1141,10 +1148,10 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false), true, true);
verifyFlags(commandList->appendWaitOnEvents(1, &event, false), false, false);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
false, false);
if constexpr (FamilyType::supportsSampler) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
@@ -1157,26 +1164,26 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyFlags(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
false, false);
}
for (bool hasEventDependencies : {true, false}) {
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, false),
hasEventDependencies, hasEventDependencies);
}
@@ -1342,7 +1349,7 @@ TEST_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceForIm
commandList->commandContainer.getCommandStream()->getSpace(useSize);
EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size());
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size());
}
@@ -1850,7 +1857,7 @@ TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCom
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -1909,7 +1916,7 @@ TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCom
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -1925,7 +1932,7 @@ TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCom
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -2253,7 +2260,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendingBarrierThenPipeControl
HWTEST_F(CommandListCreate, givenCommandListWhenAppendingBarrierWithIncorrectWaitEventsThenInvalidArgumentIsReturned) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
returnValue = commandList->appendBarrier(nullptr, 4u, nullptr);
returnValue = commandList->appendBarrier(nullptr, 4, nullptr);
EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_ARGUMENT);
}

View File

@@ -91,7 +91,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
if (signalEvent) {
useEvents = true;
} else {
@@ -106,7 +106,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -117,14 +117,14 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
L0::Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendMemoryCopyKernel3dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendBlitFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
L0::Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendBlitFillCalledTimes++;
if (signalEvent) {
useEvents = true;
@@ -181,7 +181,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyCalledWithNullDstPtrT
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = nullptr;
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -191,7 +191,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyCalledWithNullSrcPtrT
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = nullptr;
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -201,7 +201,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyCalledWithNullSrcPtrA
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = nullptr;
void *dstPtr = nullptr;
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -213,7 +213,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyRegionCalledWithNullS
void *dstPtr = nullptr;
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -225,7 +225,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyRegionCalledWithNullS
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -237,7 +237,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyRegionCalledWithNullD
void *dstPtr = nullptr;
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -250,7 +250,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryFillCalledWithNullDstPtrT
cmdList.failAlignedAlloc = true;
auto result = driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_result_t ret = cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
result = driverHandle->releaseImportedPointer(dstPtr);
@@ -262,7 +262,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenQueryKernelTimestampsCalledWith
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *dstPtr = nullptr;
ze_event_handle_t eventHandle = {};
ze_result_t ret = cmdList.appendQueryKernelTimestamps(1u, &eventHandle, dstPtr, nullptr, nullptr, 1u, nullptr);
ze_result_t ret = cmdList.appendQueryKernelTimestamps(1u, &eventHandle, dstPtr, nullptr, nullptr, 1, nullptr);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -272,7 +272,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyCalledThenAppendMemor
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
}
@@ -282,7 +282,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhen4GByteMemoryCopyCalledThenAppen
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x100001234);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x100000000, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x100000000, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u);
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
@@ -293,7 +293,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyCalledThenAppendMemor
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
}
@@ -305,7 +305,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenMemoryCopyRegionCalledThenAppen
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
}
@@ -537,7 +537,7 @@ HWTEST2_F(CommandListAppend, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u);
}
@@ -549,7 +549,7 @@ HWTEST2_F(CommandListAppend, givenCommandListAnd2DWhbufferenMemoryCopyRegionCall
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u);
}
@@ -565,7 +565,7 @@ HWTEST2_F(CommandListAppend, givenImmediateCommandListWithFlushTaskEnabledWhenAp
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
auto result = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto result = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -574,7 +574,7 @@ HWTEST2_F(CommandListAppend, givenCopyOnlyCommandListWhenAppendMemoryFillCalledT
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
void *dstPtr = reinterpret_cast<void *>(0x1234);
int pattern = 1;
cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr);
cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendBlitFillCalledTimes, 0u);
}
@@ -583,7 +583,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenAppendMemoryFillCalledThenAppen
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *dstPtr = reinterpret_cast<void *>(0x1234);
int pattern = 1;
cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr);
cmdList.appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendBlitFillCalledTimes, 0u);
}
@@ -613,9 +613,9 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
auto event1 = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
events.push_back(event1.get());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2, events.data(), false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2, events.data(), false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
@@ -657,7 +657,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr);
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
@@ -699,7 +699,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_SUBDEVICE;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr);
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
@@ -732,7 +732,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenAppendImageCopyFromMemoryCalled
auto imageHW = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
imageHW->initialize(device, &zeDesc);
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_result_t ret = cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, &dstRegion, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, &dstRegion, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -746,7 +746,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenAppendImageCopyToMemoryCalledWi
auto imageHW = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
imageHW->initialize(device, &zeDesc);
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_result_t ret = cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), &srcRegion, nullptr, 0, nullptr);
ze_result_t ret = cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), &srcRegion, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u);
EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
@@ -762,7 +762,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListWhenCopyFromMemoryToImageThenBl
imageHW->initialize(device, &zeDesc);
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, &dstRegion, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, &dstRegion, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u);
EXPECT_FALSE(cmdList.useEvents);
}
@@ -782,7 +782,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhenIma
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, event->toHandle(), 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
EXPECT_TRUE(cmdList.useEvents);
@@ -803,7 +803,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhenIma
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
EXPECT_FALSE(cmdList.useEvents);
@@ -827,7 +827,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen1DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
}
@@ -850,7 +850,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen1DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, event->toHandle(), 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
EXPECT_TRUE(cmdList.useEvents);
@@ -874,7 +874,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen1DA
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
}
@@ -897,7 +897,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen1DA
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
}
@@ -919,7 +919,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen2DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
}
@@ -940,7 +940,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen2DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
}
@@ -963,7 +963,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen2DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
}
@@ -986,7 +986,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen2DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
}
@@ -1006,7 +1006,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen3DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin);
}
@@ -1026,7 +1026,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListAndNullDestinationRegionWhen3DI
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize);
EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin);
}
@@ -1042,7 +1042,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListWhenCopyFromImageToMemoryThenBl
imageHW->initialize(device, &zeDesc);
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), &srcRegion, nullptr, 0, nullptr);
cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), &srcRegion, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u);
}
@@ -1058,7 +1058,7 @@ HWTEST2_F(CommandListAppend, givenCopyCommandListWhenCopyFromImageToImageThenBli
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr);
cmdList.appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u);
EXPECT_FALSE(cmdList.useEvents);
}
@@ -1078,7 +1078,7 @@ HWTEST2_F(CommandListAppend, givenComputeCommandListAndEventIsUsedWhenCopyFromIm
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, event->toHandle(), 0, nullptr);
cmdList.appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(cmdList.appendCopyImageBlitCalledTimes, 0u);
EXPECT_EQ(event.get(), cmdList.appendKernelEventValue);
}
@@ -1120,7 +1120,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionWithinMaxBlitSize
size_t rowPitch = copySize.x;
size_t slicePitch = copySize.x * copySize.y;
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -1167,7 +1167,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionWithinMaxBlitSize
canonizedGpuAddress);
size_t rowPitch = copySize.x;
size_t slicePitch = copySize.x * copySize.y;
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
uint32_t bytesPerPixel = NEO::BlitCommandsHelper<FamilyType>::getAvailableBytesPerPixel(copySize.x, srcRegion.originX, dstRegion.originY, srcSize.x, dstSize.x);
GenCmdList cmdList;
@@ -1213,7 +1213,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionGreaterThanMaxBli
canonizedGpuAddress);
size_t rowPitch = copySize.x;
size_t slicePitch = copySize.x * copySize.y;
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -1242,7 +1242,7 @@ class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
L0::Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
this->srcSize = srcSize;
this->dstSize = dstSize;
return ZE_RESULT_SUCCESS;
@@ -1268,7 +1268,7 @@ HWTEST2_F(CommandListCreate, givenZeroAsPitchAndSlicePitchWhenMemoryCopyRegionCa
ze_copy_region_t srcRegion = dstRegion;
uint32_t pitch = 0;
uint32_t slicePitch = 0;
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.dstSize.x, dstRegion.width + dstRegion.originX);
EXPECT_EQ(cmdList.dstSize.y, dstRegion.height + dstRegion.originY);
EXPECT_EQ(cmdList.dstSize.z, dstRegion.depth + dstRegion.originZ);
@@ -1287,7 +1287,7 @@ HWTEST2_F(CommandListCreate, givenPitchAndSlicePitchWhenMemoryCopyRegionCalledTh
ze_copy_region_t srcRegion = dstRegion;
uint32_t pitch = 0x1000;
uint32_t slicePitch = 0x100000;
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.dstSize.x, pitch);
EXPECT_EQ(cmdList.dstSize.y, slicePitch / pitch);
@@ -1347,7 +1347,7 @@ HWTEST2_F(CommandListAppendMemoryCopyBlit, whenAppendMemoryCopyBlitIsAppendedAnd
auto result = commandList->appendMemoryCopy(dstPtr,
srcPtr,
size,
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto secondBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation();

View File

@@ -313,7 +313,7 @@ HWTEST2_F(CommandListCreate,
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -375,7 +375,7 @@ HWTEST2_F(CommandListCreate,
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -432,7 +432,7 @@ HWTEST2_F(CommandListCreate,
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -491,7 +491,7 @@ HWTEST2_F(CommandListCreate,
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096u,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -546,7 +546,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList cmdList;
@@ -602,7 +602,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -660,7 +660,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic
int one = 1;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -712,7 +712,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionWithSignalAndWa
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -745,7 +745,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionWithSignalAndIn
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, nullptr);
srcBuffer, &sr, width, 0, events[0], 1, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -777,7 +777,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHasEmptyRegionW
ze_copy_region_t sr = {0U, 0U, 0U, 0U, 0U, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, 0U, 0U, 0U};
result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, 0, 0,
srcBuffer, &sr, 0, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, 0, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -820,7 +820,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -865,7 +865,7 @@ TEST_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSigna
for (auto i = 0; i < 2000; i++) {
ret = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
}
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}
@@ -913,7 +913,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
auto result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, &events[1]);
srcBuffer, &sr, width, 0, events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -965,7 +965,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyRegionFromI
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
@@ -1011,7 +1011,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyRegionFromI
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_image_region_t dstRegion = {2, 2, 2, 4, 4, 4};
returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
}
@@ -1055,7 +1055,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyFromImageTo
imageHWSrc->initialize(device, &desc);
imageHWDst->initialize(device, &desc);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
@@ -1103,7 +1103,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenMemoryCopyRegio
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, events[0], 1u, nullptr);
srcBuffer, &sr, width, 0, events[0], 1, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -1186,7 +1186,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe
EXPECT_EQ(eventObject->queryStatus(), ZE_RESULT_SUCCESS);
commandList->appendBarrier(nullptr, 0, nullptr);
commandList->appendBarrier(event, 0, nullptr);
}
TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsDisabled) {

View File

@@ -166,11 +166,11 @@ HWTEST2_F(CommandListCreate, givenUseCsrImmediateSubmissionEnabledForCopyImmedia
imageHWSrc->initialize(device, &desc);
imageHWDst->initialize(device, &desc);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
@@ -211,11 +211,11 @@ HWTEST2_F(CommandListCreate, givenUseCsrImmediateSubmissionDisabledForCopyImmedi
imageHWSrc->initialize(device, &desc);
imageHWDst->initialize(device, &desc);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr);
returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
@@ -234,7 +234,7 @@ HWTEST_F(CommandListCreate, givenUseCsrImmediateSubmissionEnabledForCopyImmediat
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -314,7 +314,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -350,13 +350,13 @@ HWTEST2_F(CommandListAppendLaunchKernelResetKernelCount, givenIsKernelSplitOpera
event->increaseKernelCount();
launchParams.isKernelSplitOperation = true;
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, event->getKernelCount());
}
{
launchParams.isKernelSplitOperation = false;
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getKernelCount());
}
@@ -746,7 +746,7 @@ HWTEST_F(CommandListCreate, GivenCommandListWhenUnalignedPtrThenLeftMiddleAndRig
void *srcPtr = reinterpret_cast<void *>(0x4321);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 2 * MemoryConstants::cacheLineSize, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 2 * MemoryConstants::cacheLineSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
@@ -786,7 +786,7 @@ HWTEST2_F(HostPointerManagerCommandListTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
int pattern = 1;
ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr);
ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -803,7 +803,7 @@ HWTEST2_F(HostPointerManagerCommandListTest,
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
int pattern = 1;
ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr);
ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -906,7 +906,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS
events.push_back(event1.get());
result = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -958,7 +958,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS
auto offset = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, nullptr);
events[0], 1u, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -998,7 +998,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS
events.push_back(event1.get());
result = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -1033,7 +1033,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS
events.push_back(event1.get());
result = commandList->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, nullptr);
events[0], 1u, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -1084,7 +1084,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory
events.push_back(event1.get());
ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -1132,7 +1132,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory
events.push_back(event1.get());
ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, &events[1]);
events[0], 1, &events[1], false);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);
@@ -1182,7 +1182,7 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory
events.push_back(event1.get());
ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast<void *>(&one), sizeof(one), size,
events[0], 1u, nullptr);
events[0], 1u, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
ret = hostDriverHandle->releaseImportedPointer(heapPointer);

View File

@@ -369,7 +369,7 @@ HWTEST2_F(CommandListTest,
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
}
@@ -388,7 +388,7 @@ HWTEST2_F(CommandListTest,
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -413,7 +413,7 @@ HWTEST2_F(CommandListTest,
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -429,7 +429,7 @@ HWTEST2_F(CommandListTest,
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
}
@@ -448,7 +448,7 @@ HWTEST2_F(CommandListTest,
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -473,7 +473,7 @@ HWTEST2_F(CommandListTest,
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstBuffer, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -498,7 +498,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenCopyFromImageToImageTheBui
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
commandList->appendImageCopyRegion(imageHwDst->toHandle(), imageHwSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr);
commandList->appendImageCopyRegion(imageHwDst->toHandle(), imageHwSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
}
@@ -518,7 +518,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenCopyFromImageToExternalHos
imageHw->initialize(device, &zeDesc);
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendImageCopyToMemory(dstPtr, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr);
commandList->appendImageCopyToMemory(dstPtr, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
}
@@ -543,7 +543,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenCopyFromImageToUsmHostMemo
imageHw->initialize(device, &zeDesc);
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendImageCopyToMemory(dstBuffer, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr);
commandList->appendImageCopyToMemory(dstBuffer, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -574,7 +574,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenCopyFromImageToUsmDeviceMe
imageHw->initialize(device, &zeDesc);
ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2};
commandList->appendImageCopyToMemory(dstBuffer, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr);
commandList->appendImageCopyToMemory(dstBuffer, imageHw->toHandle(), &srcRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -601,7 +601,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenImageCopyFromMemoryThenBui
Vec3<size_t> expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth};
Vec3<size_t> expectedRegionOrigin = {0, 0, 0};
commandList->appendImageCopyFromMemory(imageHw->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr);
commandList->appendImageCopyFromMemory(imageHw->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
}
@@ -612,7 +612,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInExternalHostAl
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -630,7 +630,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInUsmHostAllocat
void *srcPtr = reinterpret_cast<void *>(0x1234);
commandList->appendMemoryCopy(dstBuffer, srcPtr, 8, nullptr, 0, nullptr);
commandList->appendMemoryCopy(dstBuffer, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -654,7 +654,7 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInUsmDeviceAlloc
void *srcPtr = reinterpret_cast<void *>(0x1234);
commandList->appendMemoryCopy(dstBuffer, srcPtr, 8, nullptr, 0, nullptr);
commandList->appendMemoryCopy(dstBuffer, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -675,12 +675,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillInUsmHostThenBui
auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &dstBuffer);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
commandList->appendMemoryFill(dstBuffer, pattern, 1, allocSize, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, 1, allocSize, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -705,12 +705,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillInUsmDeviceThenB
size, alignment, &dstBuffer);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, size, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, size, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
commandList->appendMemoryFill(dstBuffer, pattern, 1, size, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, 1, size, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -737,12 +737,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKer
constexpr size_t fillSize = size - 1;
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr);
commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation);
EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
@@ -858,7 +858,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
size_t sshUsed = csrSshHeap->getUsed();
size_t csrUsedBefore = csrStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t csrUsedAfter = csrStream.getUsed();
@@ -923,7 +923,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
sshUsed = csrSshHeap->getUsed();
csrUsedBefore = csrStream.getUsed();
result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListImmediateCoexisting->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
csrUsedAfter = csrStream.getUsed();

View File

@@ -246,7 +246,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchCoopera
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event.get(), commandList->appendKernelEventValue);
}
@@ -277,7 +277,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchMultipl
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event->toHandle(), commandList->appendEventMultipleKernelIndirectEventHandleValue);
@@ -312,7 +312,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchIndirec
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast<ze_group_count_t *>(alloc), event->toHandle(), 0, nullptr);
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast<ze_group_count_t *>(alloc), event->toHandle(), 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(event->toHandle(), commandList->appendEventKernelIndirectEventHandleValue);
@@ -472,7 +472,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyThe
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -495,7 +495,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyWit
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr, false);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -508,7 +508,7 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalled
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &commandContainer = commandList0->commandContainer;
@@ -565,7 +565,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
@@ -635,7 +635,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
ze_event_handle_t eventHandles[1] = {event->toHandle()};
EXPECT_EQ(MockEvent::STATE_CLEARED, static_cast<MockEvent *>(event.get())->isCompleted);
result = CommandList::fromHandle(cmdListHandle)->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, eventHandles, launchParams);
result = CommandList::fromHandle(cmdListHandle)->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, eventHandles, launchParams, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(MockEvent::STATE_SIGNALED, static_cast<MockEvent *>(event.get())->isCompleted);
@@ -753,7 +753,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
@@ -780,12 +780,12 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
size_t usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
size_t usedAfter = cmdStream.getUsed();
GenCmdList cmdList;
@@ -818,7 +818,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis
}
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
usedAfter = cmdStream.getUsed();
cmdList.clear();
@@ -846,7 +846,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis
cmdStream.getSpace(cmdStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
auto oldCmdBuffer = cmdStream.getGraphicsAllocation();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
usedBefore = 0;
usedAfter = cmdStream.getUsed();
@@ -886,7 +886,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis
void *oldBase = cmdStream.getCpuBase();
oldCmdBuffer = cmdStream.getGraphicsAllocation();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
@@ -949,10 +949,10 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
size_t usedBefore = cmdStream.getUsed();
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
size_t usedAfter = cmdStream.getUsed();
GenCmdList cmdList;
@@ -982,7 +982,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom
}
usedBefore = cmdStream.getUsed();
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
usedAfter = cmdStream.getUsed();
cmdList.clear();
@@ -1008,7 +1008,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom
void *oldBase = cmdStream.getCpuBase();
cmdStream.getSpace(cmdStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
@@ -1054,7 +1054,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom
oldCmdBuffer = cmdStream.getGraphicsAllocation();
usedBefore = 0;
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
usedAfter = cmdStream.getUsed();
newCmdBuffer = cmdStream.getGraphicsAllocation();
@@ -1114,27 +1114,27 @@ HWTEST2_F(MultiReturnCommandListTest,
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.disableEuFusion) {
@@ -1367,23 +1367,23 @@ HWTEST2_F(MultiReturnCommandListTest,
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
@@ -1613,7 +1613,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
size_t usedBefore = regularCmdListStream.getUsed();
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = regularCmdListStream.getUsed();
@@ -1677,7 +1677,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
size_t csrUsedBefore = csrStream.getUsed();
usedBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
@@ -1748,7 +1748,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
size_t csrUsedBefore = csrStream.getUsed();
size_t usedBefore = immediateCmdListStream.getUsed();
ze_result_t result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ze_result_t result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
@@ -1797,7 +1797,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
auto &regularCmdListStream = *commandList->commandContainer.getCommandStream();
usedBefore = regularCmdListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = regularCmdListStream.getUsed();
@@ -2069,7 +2069,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
EXPECT_EQ(nullptr, dstAlloc->getLockedPtr());
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(1u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
EXPECT_NE(nullptr, dstAlloc->getLockedPtr());
}
@@ -2084,7 +2084,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
EXPECT_EQ(nullptr, dstAlloc->getLockedPtr());
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(1u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
EXPECT_NE(nullptr, dstAlloc->getLockedPtr());
}
@@ -2110,7 +2110,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenForceModeWhenCopyIsCalledThenBothAllo
EXPECT_EQ(nullptr, dstAlloc->getLockedPtr());
EXPECT_EQ(nullptr, dstAlloc2->getLockedPtr());
cmdList.appendMemoryCopy(devicePtr2, devicePtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr2, devicePtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(2u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
EXPECT_NE(nullptr, dstAlloc->getLockedPtr());
EXPECT_NE(nullptr, dstAlloc2->getLockedPtr());
@@ -2138,7 +2138,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenForceModeWhenCopyIsCalledFromHostUsmT
EXPECT_EQ(nullptr, dstAlloc->getLockedPtr());
EXPECT_EQ(nullptr, hostAlloction->getLockedPtr());
cmdList.appendMemoryCopy(hostPtr, devicePtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(hostPtr, devicePtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(1u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
EXPECT_NE(nullptr, dstAlloc->getLockedPtr());
EXPECT_EQ(nullptr, hostAlloction->getLockedPtr());
@@ -2157,7 +2157,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
device->getDriverHandle()->getMemoryManager()->lockResource(dstAlloc);
EXPECT_EQ(1u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(1u, reinterpret_cast<MockMemoryManager *>(device->getDriverHandle()->getMemoryManager())->lockResourceCalled);
}
@@ -2168,7 +2168,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
memset(nonUsmHostPtr, 1, 1024);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
NEO::SvmAllocationData *allocData;
@@ -2196,7 +2196,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAnd
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_EQ(event->queryStatus(), ZE_RESULT_SUCCESS);
@@ -2222,7 +2222,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndSignalEventAnd
EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY);
cmdList.appendBarrier(nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_ERROR_DEVICE_LOST);
EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY);
@@ -2233,7 +2233,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWith
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled;
@@ -2246,7 +2246,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWith
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendBarrier(nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(cmdList.csr)->waitForCompletionWithTimeoutTaskCountCalled;
@@ -2264,7 +2264,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendBarrier
EXPECT_TRUE(cmdList.dependenciesPresent);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_FALSE(cmdList.dependenciesPresent);
@@ -2290,7 +2290,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE
EXPECT_TRUE(cmdList.dependenciesPresent);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_FALSE(cmdList.dependenciesPresent);
@@ -2311,11 +2311,9 @@ class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImm
appendMemoryCopyKernelWithGACalled++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendBarrier(ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_result_t appendBarrier(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendBarrierCalled++;
return MockCommandListImmediateHw<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
return MockCommandListImmediateHw<gfxCoreFamily>::appendBarrier(hEvent, numWaitEvents, phWaitEvents);
}
uint32_t appendBarrierCalled = 0;
@@ -2325,11 +2323,12 @@ class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImm
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyH2DThenUseGpuMemcpy, IsAtLeastSkl) {
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
void *usmSrcPtr;
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, 1024, 1u, &usmSrcPtr);
cmdList.appendMemoryCopy(devicePtr, usmSrcPtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, usmSrcPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
context->freeMem(usmSrcPtr);
}
@@ -2337,11 +2336,12 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrW
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmDstHostPtrWhenCopyThenUseGpuMemcpy, IsAtLeastSkl) {
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
void *usmHostDstPtr;
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, 1024, 1u, &usmHostDstPtr);
cmdList.appendMemoryCopy(usmHostDstPtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(usmHostDstPtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
context->freeMem(usmHostDstPtr);
}
@@ -2349,11 +2349,12 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmDstHostPtrW
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrWhenCopyThenUseGpuMemcpy, IsAtLeastSkl) {
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
void *usmHostSrcPtr;
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, 1024, 1u, &usmHostSrcPtr);
cmdList.appendMemoryCopy(nonUsmHostPtr, usmHostSrcPtr, 1024, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(nonUsmHostPtr, usmHostSrcPtr, 1024, nullptr, 0, nullptr, false);
EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
context->freeMem(usmHostSrcPtr);
}
@@ -2361,14 +2362,16 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmSrcHostPtrW
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmSrcHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) {
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr, false);
EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostPtrWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) {
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr, false);
EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
}
@@ -2377,7 +2380,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndFailedToLockPt
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr, false);
ASSERT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
NEO::SvmAllocationData *dstAllocData;
@@ -2389,7 +2392,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndFailedToLockPt
mockMemoryManager->failLockResource = true;
ASSERT_FALSE(graphicsAllocation->isLocked());
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
}
@@ -2399,7 +2402,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSiz
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
}
@@ -2408,7 +2411,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSiz
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
}
@@ -2427,7 +2430,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithD
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto phEvent = event->toHandle();
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 1, &phEvent);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 1, &phEvent, false);
EXPECT_EQ(cmdList.appendBarrierCalled, 1u);
}
@@ -2435,7 +2438,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWitho
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.appendBarrierCalled, 0u);
}
@@ -2458,7 +2461,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagS
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto phEvent = event->toHandle();
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr, false);
ze_kernel_timestamp_result_t resultTimestamp = {};
auto result = event->queryKernelTimestamp(&resultTimestamp);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
@@ -2490,7 +2493,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagN
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto phEvent = event->toHandle();
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 2 * MemoryConstants::kiloByte, phEvent, 0, nullptr, false);
ze_kernel_timestamp_result_t resultTimestamp = {};
auto result = event->queryKernelTimestamp(&resultTimestamp);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);

View File

@@ -41,7 +41,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowe
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed());
@@ -57,7 +57,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed());
@@ -101,7 +101,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value);
@@ -123,7 +123,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value);
@@ -193,7 +193,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
@@ -208,13 +208,13 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
}
@@ -231,13 +231,13 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToSynch
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(0u, commandList->getPrintfKernelContainer().size());
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
@@ -256,13 +256,13 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToAsync
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(0u, commandList->getPrintfKernelContainer().size());
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
@@ -286,13 +286,13 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendToSynchro
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(0u, commandList->getPrintfKernelContainer().size());
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(2u, kernel.printPrintfOutputCalledTimes);
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
@@ -317,7 +317,7 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDe
const_cast<KernelDescriptor::AddressingMode &>(kernel->getKernelDescriptor().kernelAttributes.bufferAddressingMode) = KernelDescriptor::BindfulAndStateless;
CmdListKernelLaunchParams launchParams = {};
for (size_t i = 0; i < sshHeapSize / kernelSshSize + 1; i++) {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -352,7 +352,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -447,7 +447,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -476,7 +476,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenForcePipeControlPriorToWalkerKeyTh
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandListBase->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListBase->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandListBase->commandContainer.getCommandStream()->getUsed();
@@ -497,7 +497,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenForcePipeControlPriorToWalkerKeyTh
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedSpaceBefore = commandListWithDebugKey->commandContainer.getCommandStream()->getUsed();
result = commandListWithDebugKey->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandListWithDebugKey->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedSpaceAfter = commandListWithDebugKey->commandContainer.getCommandStream()->getUsed();
@@ -533,7 +533,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenForcePipeControlPriorToWalkerKeyAn
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto secondBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation();
@@ -564,7 +564,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelS
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
for (uint32_t i = 0; i < NEO::TimestampPacketSizeControl::preferredPacketCount + 4; i++) {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
EXPECT_EQ(1u, event->getPacketsInUse());
@@ -590,13 +590,13 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWo
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
static_cast<ze_group_count_t *>(alloc),
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
kernel.groupSize[2] = 2;
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
static_cast<ze_group_count_t *>(alloc),
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
GenCmdList cmdList;
@@ -684,7 +684,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenCommandListWhenResetCalledThenState
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(
kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
@@ -742,7 +742,7 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAddingKernelsThenResidencyContainerD
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
for (int i = 0; i < 4; ++i) {
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -780,7 +780,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemapho
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 1, &hEventHandle, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 1, &hEventHandle, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -838,7 +838,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemap
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 2, waitEvents, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 2, waitEvents, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -859,7 +859,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCoo
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, nullptr);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
}
@@ -873,7 +873,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendLaun
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
returnValue = cmdList.appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
returnValue = cmdList.appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(0u, cmdList.executeCommandListImmediateCalledCount);
EXPECT_EQ(1u, cmdList.executeCommandListImmediateWithFlushTaskCalledCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
@@ -888,7 +888,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendLaun
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
returnValue = cmdList.appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
returnValue = cmdList.appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(1u, cmdList.executeCommandListImmediateCalledCount);
EXPECT_EQ(0u, cmdList.executeCommandListImmediateWithFlushTaskCalledCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);

View File

@@ -85,7 +85,7 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
pThreadGroupDimensions,
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pThreadGroupDimensions);
@@ -257,7 +257,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
pThreadGroupDimensions,
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pThreadGroupDimensions);
@@ -401,7 +401,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendLaunchK
auto usedSpaceBefore = cmdStream->getUsed();
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();
@@ -696,7 +696,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendMemoryC
void *srcBuffer = reinterpret_cast<void *>(0x0F000000);
void *dstBuffer = reinterpret_cast<void *>(0x0FF00000);
size_t size = 1024;
auto result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, size, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();
@@ -757,7 +757,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendMemoryC
ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U};
ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U};
ze_result_t result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0,
srcBuffer, &sr, width, 0, nullptr, 0, nullptr);
srcBuffer, &sr, width, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = cmdStream->getUsed();
@@ -988,7 +988,7 @@ struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKerne
ze_group_count_t groupCount{expectedImplicitArgs.groupCountX, expectedImplicitArgs.groupCountY, expectedImplicitArgs.groupCountZ};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, neoDevice->getHardwareInfo());
@@ -1133,7 +1133,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppen
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
@@ -1171,7 +1171,7 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWitchScratchAndPrivateWhenA
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(commandList->getCommandListPerThreadPrivateScratchSize(), static_cast<uint32_t>(0x100));
@@ -1413,7 +1413,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
ze_group_count_t groupCount{256, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, hEventHandle, 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, hEventHandle, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(4u, event->getPacketsInUse());
@@ -1469,7 +1469,7 @@ HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
auto sizeBefore = cmdStream->getUsed();
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.get(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.get(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto sizeAfter = cmdStream->getUsed();

View File

@@ -47,7 +47,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto commandStream = commandList->commandContainer.getCommandStream();
@@ -95,7 +95,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppend
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -143,7 +143,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 0;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -172,7 +172,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 2;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@@ -219,7 +219,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
CmdListKernelLaunchParams launchParams = {};
result = commandList0->appendLaunchKernel(
kernel->toHandle(),
&groupCount, nullptr, 0, nullptr, launchParams);
&groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -246,7 +246,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
CmdListKernelLaunchParams launchParams = {};
result = commandList0->appendLaunchKernel(
kernel->toHandle(),
&groupCount, nullptr, 1, nullptr, launchParams);
&groupCount, nullptr, 1, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -271,7 +271,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
result = commandList0->appendLaunchKernelIndirect(
kernel->toHandle(),
&groupCount, nullptr, 0, nullptr);
&groupCount, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -297,7 +297,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
result = commandList0->appendLaunchKernelIndirect(
kernel->toHandle(),
&groupCount, nullptr, 1, nullptr);
&groupCount, nullptr, 1, nullptr, false);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -321,7 +321,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
engineGroupType = gfxCoreHelper.getEngineGroupType(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative, *defaultHwInfo);
}
pCommandList->initialize(device, engineGroupType, 0u);
auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
@@ -527,7 +527,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
ze_group_count_t groupCount{8, 1, 1};
CmdListKernelLaunchParams launchParams = {};
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0U, nullptr, launchParams);
returnValue = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
}
@@ -568,7 +568,7 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
static_cast<ze_group_count_t *>(alloc),
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT);
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + getIndirectHeapOffsetForImplicitArgsBuffer<FamilyType>(kernel);

View File

@@ -27,7 +27,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.hostPtrMap.size(), 2u);
}
@@ -38,7 +38,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCo
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
@@ -51,7 +51,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCo
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
@@ -64,7 +64,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemo
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
@@ -76,7 +76,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemo
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
@@ -88,7 +88,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndAlignedHostPointersWhenBlitMemory
void *dstPtr = alignDown(reinterpret_cast<void *>(0x2345), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
EXPECT_EQ(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_EQ(cmdList.dstBlitCopyRegionOffset, 0u);
}
@@ -102,7 +102,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
@@ -133,7 +133,7 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThen
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
@@ -154,8 +154,9 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWith
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
commandList->csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr, false);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
commandList->cmdQImmediate = nullptr;
@@ -175,7 +176,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
@@ -220,7 +221,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop
expectedSize = alignUp(ultCsr->getCmdsSizeForHardwareContext() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START), MemoryConstants::cacheLineSize);
}
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false));
EXPECT_EQ(expectedSize, ultCsr->getCS(0).getUsed());
EXPECT_TRUE(ultCsr->isMadeResident(commandList->commandContainer.getCommandStream()->getGraphicsAllocation()));
@@ -270,7 +271,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop
size_t csrOfffset = ultCsr->getCS(0).getUsed();
size_t cmdListOffset = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false));
EXPECT_EQ(csrOfffset, ultCsr->getCS(0).getUsed());
@@ -315,7 +316,7 @@ HWTEST2_F(AppendMemoryCopy, givenSyncImmediateCommandListWhenAppendingMemoryCopy
expectedSize = alignUp(ultCsr->getCmdsSizeForHardwareContext() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START), MemoryConstants::cacheLineSize);
}
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false));
EXPECT_EQ(expectedSize, ultCsr->getCS(0).getUsed());
EXPECT_TRUE(ultCsr->isMadeResident(commandList->commandContainer.getCommandStream()->getGraphicsAllocation()));
@@ -364,7 +365,7 @@ HWTEST2_F(AppendMemoryCopy, givenSyncImmediateCommandListWhenAppendingMemoryCopy
size_t csrOfffset = ultCsr->getCS(0).getUsed();
size_t cmdListOffset = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false));
EXPECT_EQ(csrOfffset, ultCsr->getCS(0).getUsed());
@@ -388,7 +389,7 @@ HWTEST2_F(AppendMemoryCopy, givenSyncModeImmediateCommandListWhenAppendingMemory
commandList->isSyncModeQueue = true;
commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
@@ -408,7 +409,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledT
auto &commandContainer = cmdList.commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
GenCmdList genCmdList;
@@ -454,7 +455,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr, false);
EXPECT_GT(commandList.appendMemoryCopyBlitCalled, 1u);
EXPECT_EQ(1u, event->getPacketsInUse());
@@ -515,7 +516,7 @@ HWTEST2_F(AppendMemoryCopy,
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(2u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(1u, event->getPacketsInUse());
@@ -573,7 +574,7 @@ HWTEST2_F(AppendMemoryCopy,
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(1u, event->getPacketsInUse());

View File

@@ -85,7 +85,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
ultCsr->directSubmission.reset(directSubmission);
ze_event_handle_t hEventHandle = event->toHandle();
auto result = immCommandList->appendWaitOnEvents(1, &hEventHandle, true);
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = immCommandList->commandContainer.getCommandStream()->getUsed();
@@ -669,7 +669,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto eventHandle = event->toHandle();
ze_group_count_t group = {1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandListImmediate->appendLaunchKernel(kernel->toHandle(), &group, nullptr, 1, &eventHandle, launchParams);
commandListImmediate->appendLaunchKernel(kernel->toHandle(), &group, nullptr, 1, &eventHandle, launchParams, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -679,7 +679,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto eventHandle = event->toHandle();
ze_group_count_t group = {1, 1, 1};
commandListImmediate->appendLaunchKernelIndirect(kernel->toHandle(), &group, nullptr, 1, &eventHandle);
commandListImmediate->appendLaunchKernelIndirect(kernel->toHandle(), &group, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -699,7 +699,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto eventHandle = event->toHandle();
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
commandListImmediate->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, &eventHandle);
commandListImmediate->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -712,7 +712,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
commandListImmediate->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 1, &eventHandle);
commandListImmediate->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -727,7 +727,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
int one = 1;
commandListImmediate->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4096,
nullptr, 1u, &eventHandle);
nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
@@ -738,7 +738,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto eventHandle = event->toHandle();
commandListImmediate->appendWaitOnEvents(1u, &eventHandle, false);
commandListImmediate->appendWaitOnEvents(1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -778,7 +778,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2};
auto eventHandle = event->toHandle();
commandListImmediate->appendImageCopyRegion(imageDst->toHandle(), imageSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 1, &eventHandle);
commandListImmediate->appendImageCopyRegion(imageDst->toHandle(), imageSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -803,7 +803,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
std::unique_ptr<L0::Image> image(imagePtr);
auto eventHandle = event->toHandle();
commandListImmediate->appendImageCopyFromMemory(imagePtr->toHandle(), ptr, nullptr, nullptr, 1, &eventHandle);
commandListImmediate->appendImageCopyFromMemory(imagePtr->toHandle(), ptr, nullptr, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -828,7 +828,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
std::unique_ptr<L0::Image> image(imagePtr);
auto eventHandle = event->toHandle();
commandListImmediate->appendImageCopyToMemory(ptr, imagePtr->toHandle(), nullptr, nullptr, 1, &eventHandle);
commandListImmediate->appendImageCopyToMemory(ptr, imagePtr->toHandle(), nullptr, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}
@@ -852,7 +852,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate
ze_group_count_t groupCount{1, 1, 1};
auto eventHandle = event->toHandle();
commandListImmediate->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, &eventHandle);
commandListImmediate->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, &eventHandle, false);
EXPECT_TRUE(ultCsr.downloadAllocationsCalled);
}

View File

@@ -69,7 +69,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillCalledWithL
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
uint64_t pattern[4] = {1, 2, 3, 4};
void *ptr = reinterpret_cast<void *>(0x1234);
auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr);
auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, ret);
}
@@ -78,7 +78,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillToNotDevice
cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u);
uint8_t pattern = 1;
void *ptr = reinterpret_cast<void *>(0x1234);
auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr);
auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false);
EXPECT_EQ(ret, ZE_RESULT_ERROR_INVALID_ARGUMENT);
}
@@ -96,7 +96,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillThenCopyBlt
commandList.initialize(device, NEO::EngineGroupType::Copy, 0u);
uint16_t pattern = 1;
void *ptr = reinterpret_cast<void *>(0x1234);
commandList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr);
commandList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed()));
@@ -126,7 +126,7 @@ HWTEST2_F(AppendMemoryCopy,
commandList.initialize(device, NEO::EngineGroupType::Copy, 0u);
uint32_t pattern = 1;
ze_result_t result = commandList.appendMemoryFill(hostPointer.get(), reinterpret_cast<void *>(&pattern), sizeof(pattern), size, nullptr, 0, nullptr);
ze_result_t result = commandList.appendMemoryFill(hostPointer.get(), reinterpret_cast<void *>(&pattern), sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
GenCmdList cmdList;
@@ -152,7 +152,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, false);
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;
@@ -177,7 +177,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;
@@ -247,7 +247,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyR
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages, MemoryManager::maxOsContextCount);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event.get(), 0, nullptr);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event.get(), 0, nullptr, false);
GenCmdList cmdList;
auto baseAddr = event->getGpuAddress(device);
@@ -329,7 +329,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenCopyFromImagBlitThenCommandA
imageHWSrc->initialize(device, &zeDesc);
imageHWDst->initialize(device, &zeDesc);
commandList->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, nullptr, nullptr, 0, nullptr);
commandList->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, nullptr, nullptr, 0, nullptr, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
@@ -346,7 +346,7 @@ HWTEST2_F(AppendMemoryCopyFromContext, givenCommandListThenUpOnPerformingAppendM
commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}

View File

@@ -113,7 +113,7 @@ void testSingleTileAppendMemoryCopyThreeKernels(CopyTestInput &input, TestExpect
uint64_t secondKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + 2 * event->getSinglePacketSize();
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -199,7 +199,7 @@ void testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input,
uint64_t secondKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + 2 * event->getSinglePacketSize();
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -303,7 +303,7 @@ void testSingleTileAppendMemoryCopySingleKernel(CopyTestInput &input, TestExpect
uint64_t gpuBaseAddress = event->getGpuAddress(input.device);
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress;
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(1u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -377,7 +377,7 @@ void testSingleTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input,
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress;
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(1u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -467,7 +467,7 @@ void testSingleTileAppendMemoryCopySignalScopeEventToSubDevice(CopyTestInput &in
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, input.device));
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event.get(), 0u, nullptr);
result = commandList->appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event.get(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -533,7 +533,7 @@ void testMultiTileAppendMemoryCopyThreeKernels(CopyTestInput &input, TestExpecte
uint64_t secondKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + 2 * event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + 4 * event->getSinglePacketSize();
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -627,7 +627,7 @@ void testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(CopyTestInput &input, T
uint64_t thirdKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress + 4 * event->getSinglePacketSize();
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
@@ -747,7 +747,7 @@ void testMultiTileAppendMemoryCopySingleKernel(CopyTestInput &input, TestExpecte
uint64_t gpuBaseAddress = event->getGpuAddress(input.device);
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress;
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(1u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
@@ -826,7 +826,7 @@ void testMultiTileAppendMemoryCopySingleKernelAndL3Flush(CopyTestInput &input, T
uint64_t firstKernelEventAddress = arg.postSyncAddressZero ? 0 : gpuBaseAddress;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr);
commandList.appendMemoryCopy(input.dstPtr, input.srcPtr, input.size, event->toHandle(), 0, nullptr, false);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
EXPECT_EQ(1u, commandList.appendMemoryCopyKernelWithGACalled);

View File

@@ -34,7 +34,7 @@ HWTEST2_F(AppendFillTest,
auto result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
sizeof(immediatePattern),
immediateAllocSize, nullptr, 0, nullptr);
immediateAllocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -45,7 +45,7 @@ HWTEST2_F(AppendFillTest,
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr);
auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -57,7 +57,7 @@ HWTEST2_F(AppendFillTest,
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0;
auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr);
auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false);
EXPECT_NE(ZE_RESULT_SUCCESS, result);
}
@@ -68,13 +68,13 @@ HWTEST2_F(AppendFillTest,
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
EXPECT_EQ(patternAllocationsVectorSize, 1u);
uint8_t *newDstPtr = new uint8_t[allocSize];
result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
@@ -90,13 +90,13 @@ HWTEST2_F(AppendFillTest,
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
EXPECT_EQ(patternAllocationsVectorSize, 1u);
uint8_t newPattern[patternSize] = {1, 2, 3, 4};
result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
@@ -112,7 +112,7 @@ HWTEST2_F(AppendFillTest,
int pattern = 0;
const size_t size = 1024 * 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
EXPECT_EQ(size, commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16);
@@ -128,7 +128,7 @@ HWTEST2_F(AppendFillTest,
int pattern = 0;
const size_t size = 1025;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -146,7 +146,7 @@ HWTEST2_F(AppendFillTest,
int pattern = 0;
const size_t size = 4;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -164,7 +164,7 @@ HWTEST2_F(AppendFillTest,
int pattern = 0;
const size_t size = neoDevice->getDeviceInfo().maxWorkGroupSize / 2;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
@@ -181,7 +181,7 @@ HWTEST2_F(AppendFillTest,
int pattern = 0;
const size_t size = 1024 * 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto groupSize = device->getDeviceInfo().maxWorkGroupSize;
auto dataTypeSize = sizeof(uint32_t) * 4;
@@ -200,7 +200,7 @@ HWTEST2_F(AppendFillTest,
uint32_t offset = 1;
const size_t size = 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX;
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX * 16;
filledSize += commandList->xGroupSizes[2] * commandList->threadGroupDimensions[2].groupCountX;
@@ -221,7 +221,7 @@ HWTEST2_F(AppendFillTest,
uint32_t offset = 1;
const size_t size = 2;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -241,7 +241,7 @@ HWTEST2_F(AppendFillTest,
uint32_t offset = 1;
const size_t size = 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false);
EXPECT_NE(ZE_RESULT_SUCCESS, result);
delete[] ptr;
}
@@ -255,7 +255,7 @@ HWTEST2_F(AppendFillTest,
size_t nonMultipleSize = allocSize + 1;
uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize];
auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr);
auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
delete[] nonMultipleDstPtr;
@@ -271,7 +271,7 @@ HWTEST2_F(AppendFillTest,
size_t nonMultipleSize = allocSize + 1;
uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize];
auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr);
auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, false);
EXPECT_NE(ZE_RESULT_SUCCESS, result);
delete[] nonMultipleDstPtr;
@@ -306,7 +306,7 @@ HWTEST2_F(AppendFillTest,
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
sizeof(immediatePattern),
immediateAllocSize, event->toHandle(), 0, nullptr);
immediateAllocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getPacketsInUse());
@@ -360,7 +360,7 @@ HWTEST2_F(AppendFillTest,
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getPacketsInUse());

View File

@@ -109,7 +109,7 @@ void testSingleTileAppendMemoryFillManyImmediateKernels(FillTestInput &input, Te
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -189,7 +189,7 @@ void testSingleTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpecte
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -270,7 +270,7 @@ void testSingleTileAppendMemoryFillManyKernelsAndL3Flush(FillTestInput &input, T
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -348,7 +348,7 @@ void testSingleTileAppendMemoryFillSingleKernel(FillTestInput &input, TestExpect
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr);
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -427,7 +427,7 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input,
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr);
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -535,7 +535,7 @@ void testMultiTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpected
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr,
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr);
input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();
@@ -646,7 +646,7 @@ void testMultiTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input, T
auto &commandContainer = commandList->commandContainer;
size_t usedBefore = commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr);
result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = commandContainer.getCommandStream()->getUsed();

View File

@@ -135,7 +135,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
if (signalEvent) {
useEvents = true;
} else {
@@ -150,7 +150,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, Event *signalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -161,14 +161,14 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendMemoryCopyKernel3dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendBlitFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override {
appendBlitFillCalledTimes++;
if (signalEvent) {
useEvents = true;

View File

@@ -366,7 +366,7 @@ struct CommandListAppendLaunchKernelCompactL3FlushEventFixture : public ModuleFi
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
EXPECT_EQ(arg.expectedKernelCount, event->getKernelCount());
@@ -609,7 +609,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
size_t sizeBefore = cmdStream->getUsed();
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);

View File

@@ -321,7 +321,7 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
const size_t numHandles = 100;
ze_command_list_handle_t cmdListHandles[numHandles];
@@ -371,7 +371,7 @@ HWTEST2_F(CommandQueueCreate, givenLogicalStateHelperWhenExecutingThenMergeState
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@@ -407,7 +407,7 @@ HWTEST2_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExec
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@@ -434,7 +434,7 @@ HWTEST2_F(CommandQueueCreate, givenOutOfHostMemoryErrorFromSubmitBatchBufferWhen
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams, false);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
@@ -457,7 +457,7 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};

View File

@@ -746,7 +746,7 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams, false);
auto deviceImp = static_cast<DeviceImp *>(device);
auto pageFaultCmdQueue = whiteboxCast(deviceImp->pageFaultCommandList->cmdQImmediate);

View File

@@ -429,10 +429,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr,
launchParams);
nullptr, 0, nullptr,
launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
@@ -493,10 +491,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr,
launchParams);
nullptr, 0, nullptr,
launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
@@ -559,10 +555,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr,
launchParams);
nullptr, 0, nullptr,
launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId()));
@@ -611,10 +605,8 @@ HWTEST_F(CommandQueueIndirectAllocations, givenImmediateCommandListAndFlushTaskW
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr,
launchParams);
nullptr, 0, nullptr,
launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);

View File

@@ -373,7 +373,7 @@ struct PauseOnGpuFixture : public Test<ModuleFixture> {
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
@@ -415,7 +415,7 @@ struct PauseOnGpuTests : public PauseOnGpuFixture {
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
@@ -580,7 +580,7 @@ struct PauseOnGpuWithImmediateCommandListTests : public PauseOnGpuFixture {
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
};

View File

@@ -810,7 +810,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
int one = 1;
result = commandList0->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u);
@@ -879,7 +879,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
int one = 1;
result = commandList0->appendMemoryFill(dstBuffer, reinterpret_cast<void *>(&one), sizeof(one), 4090u,
nullptr, 0, nullptr);
nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 3u);

View File

@@ -160,7 +160,7 @@ HWTEST2_P(L0DebuggerParameterizedTests, givenDebuggerWhenAppendingKernelToComman
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->close();
@@ -190,7 +190,7 @@ HWTEST2_P(L0DebuggerParameterizedTests, givenDebuggerWhenAppendingKernelToComman
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->close();
@@ -370,7 +370,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediate
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->freeMem(dstPtr);
@@ -393,7 +393,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediate
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->freeMem(dstPtr);
@@ -416,7 +416,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForImmediat
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
context->freeMem(dstPtr);
@@ -443,7 +443,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForRegularCo
int pattern = 1;
auto commandList = CommandList::fromHandle(commandLists[0]);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
@@ -478,7 +478,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularC
int pattern = 1;
auto commandList = CommandList::fromHandle(commandLists[0]);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr);
result = commandList->appendMemoryFill(dstPtr, reinterpret_cast<void *>(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
@@ -552,7 +552,7 @@ HWTEST2_F(L0DebuggerTest, givenDebuggerEnabledAndL1CachePolicyWBWhenAppendingThe
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandList->close();
@@ -612,7 +612,7 @@ HWTEST2_F(L0DebuggerTest, givenFlushTaskSubmissionAndSharedHeapsEnabledWhenAppen
CmdListKernelLaunchParams launchParams = {};
ze_group_count_t groupCount{1, 1, 1};
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto csrHeap = &commandList->csr->getIndirectHeap(NEO::HeapType::SURFACE_STATE, 0);
@@ -626,7 +626,7 @@ HWTEST2_F(L0DebuggerTest, givenFlushTaskSubmissionAndSharedHeapsEnabledWhenAppen
memset(debugSurfaceState, 0, sizeof(*debugSurfaceState));
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(0u, debugSurfaceState->getSurfaceBaseAddress());

View File

@@ -130,7 +130,7 @@ HWTEST2_F(singleAddressSpaceModeTest, givenImmediateCommandListWhenExecutingWith
csr.lastFlushedCommandStream = nullptr;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, csr.lastFlushedCommandStream);
@@ -175,7 +175,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithF
csr.lastFlushedCommandStream = nullptr;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, csr.lastFlushedCommandStream);
@@ -234,7 +234,7 @@ HWTEST_P(L0DebuggerWithBlitterTest, givenInternalUsageImmediateCommandListWhenEx
csr.storeMakeResidentAllocations = true;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(commandList->csr->getOsContext().getContextId());
@@ -266,7 +266,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImm
ze_group_count_t groupCount{1, 1, 1};
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -283,7 +283,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForIm
ze_group_count_t groupCount{1, 1, 1};
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -299,7 +299,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImm
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -315,7 +315,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForIm
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -336,7 +336,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImm
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -363,7 +363,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForReg
const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
auto commandList = CommandList::fromHandle(commandLists[0]);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
@@ -388,7 +388,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForIm
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->destroy();
@@ -406,7 +406,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledComman
ASSERT_NE(nullptr, commandList);
for (uint32_t count = 0; count < 2048; count++) {
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
commandList->destroy();
@@ -424,7 +424,7 @@ HWTEST2_P(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledComma
ASSERT_NE(nullptr, commandList);
for (uint32_t count = 0; count < 2048; count++) {
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
commandList->destroy();
@@ -500,7 +500,7 @@ HWTEST_P(L0DebuggerWithBlitterTest, givenDebuggingEnabledWhenCommandListIsExecut
char src[8];
char dest[8];
auto result = commandList->appendMemoryCopy(dest, src, 8, nullptr, 0, nullptr);
auto result = commandList->appendMemoryCopy(dest, src, 8, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);

View File

@@ -3163,7 +3163,7 @@ HWTEST2_F(EventTimestampTest, givenAppendMemoryCopyIsCalledWhenCpuCopyIsUsedAndC
ze_device_mem_alloc_desc_t deviceDesc = {};
void *devicePtr;
context->allocDeviceMem(device->toHandle(), &deviceDesc, copySize, 1u, &devicePtr);
cmdList.appendMemoryCopy(devicePtr, hostPtr, copySize, event->toHandle(), 0, nullptr);
cmdList.appendMemoryCopy(devicePtr, hostPtr, copySize, event->toHandle(), 0, nullptr, false);
ze_kernel_timestamp_result_t result = {};
event->queryKernelTimestamp(&result);

View File

@@ -2931,7 +2931,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT);
result = context->freeMem(ptr);
@@ -2957,7 +2957,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = context->freeMem(ptr);
@@ -2984,7 +2984,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = context->freeMem(ptr);
@@ -3015,7 +3015,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = context->freeMem(ptr);
@@ -3042,7 +3042,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = context->freeMem(ptr);
@@ -3061,7 +3061,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u);
uint32_t pattern = 1;
ze_result_t result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr);
ze_result_t result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false);
EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT);
delete[] ptr;

View File

@@ -253,7 +253,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_FALSE(memoryManager->setMemPrefetchCalled);
@@ -306,7 +306,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(memoryManager->setMemPrefetchCalled);
@@ -367,7 +367,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
@@ -446,7 +446,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr
auto prefetchManager = static_cast<MockPrefetchManager *>(memoryManager->prefetchManager.get());
EXPECT_EQ(2u, commandList->getPrefetchContext().allocations.size());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr);
result = commandList->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
@@ -1078,7 +1078,7 @@ struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModule
constexpr size_t offset = 32;
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
@@ -1138,7 +1138,7 @@ struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModule
constexpr size_t offset = 32;
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;

View File

@@ -10,6 +10,7 @@
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
@@ -320,7 +321,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
@@ -370,7 +371,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
size, alignment, &srcPtr);
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
@@ -423,7 +424,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
&deviceDesc,
size, alignment, &dstPtr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
@@ -476,7 +477,68 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList0->csr);
ultCsr->recordFlusheBatchBuffer = true;
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u);
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
context->freeMem(srcPtr);
context->freeMem(dstPtr);
}
HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsCopyAndImmediateCommandListWithRelaxedOrderingWhenAppendingMemoryCopyThenSuccessIsReturned, IsXeHpcCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
ze_result_t returnValue;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
hwInfo.capabilityTable.blitterOperationsSupported = true;
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
ze_command_queue_desc_t desc = {};
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
testL0Device.get(),
&desc,
false,
NEO::EngineGroupType::Copy,
returnValue));
ASSERT_NE(nullptr, commandList0);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
constexpr size_t alignment = 4096u;
constexpr size_t size = 8 * MemoryConstants::megaByte;
void *srcPtr;
void *dstPtr;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size, alignment, &srcPtr);
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList0->csr);
ultCsr->recordFlusheBatchBuffer = true;
auto directSubmission = new MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>>(*ultCsr);
ultCsr->directSubmission.reset(directSubmission);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
@@ -533,7 +595,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
@@ -584,7 +646,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
ze_copy_region_t region = {2, 1, 1, 4 * MemoryConstants::megaByte, 1, 1};
auto result = commandList0->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, 0, nullptr);
auto result = commandList0->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
@@ -648,7 +710,7 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
ze_host_mem_alloc_desc_t hostDesc = {};
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr);
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, event->toHandle(), 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);

View File

@@ -436,7 +436,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpgCore, givenEventWhenAppendKernelIsCa
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();