mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add event profiling for copy commandLists
Change-Id: I9f13e48b4139b3ce3c802c2d38b0ce054e64562c Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
cb24b95833
commit
008af5b6e4
@@ -133,7 +133,8 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint32_t size);
|
||||
uint64_t srcOffset, uint32_t size,
|
||||
ze_event_handle_t hSignalEvent);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc,
|
||||
NEO::GraphicsAllocation *dstAlloc,
|
||||
@@ -141,7 +142,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t srcSize, size_t dstSize);
|
||||
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent);
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(NEO::GraphicsAllocation *dstAlloc, NEO::GraphicsAllocation *srcAlloc,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
@@ -168,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize);
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent);
|
||||
|
||||
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
@@ -183,6 +184,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||
bool useMemCopyToBlitFill(size_t patternSize);
|
||||
|
||||
|
||||
@@ -125,9 +125,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
ze_result_t ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
|
||||
nullptr, true, false);
|
||||
|
||||
if (hEvent) {
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -160,9 +158,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
}
|
||||
}
|
||||
|
||||
if (hEvent) {
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -204,9 +200,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
}
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -225,9 +219,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
|
||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||
|
||||
if (hSignalEvent) {
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
}
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
||||
executeCommandListImmediate(true);
|
||||
@@ -273,7 +265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
if (isCopyOnlyCmdList) {
|
||||
return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
|
||||
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
|
||||
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize);
|
||||
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, hEvent);
|
||||
}
|
||||
|
||||
Kernel *builtinKernel = nullptr;
|
||||
@@ -379,7 +371,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
if (isCopyOnlyCmdList) {
|
||||
return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
|
||||
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
|
||||
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth});
|
||||
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, hEvent);
|
||||
}
|
||||
|
||||
Kernel *builtinKernel = nullptr;
|
||||
@@ -525,7 +517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
||||
|
||||
return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
|
||||
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
|
||||
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize);
|
||||
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, hEvent);
|
||||
}
|
||||
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
|
||||
@@ -627,12 +619,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::Grap
|
||||
uint64_t dstOffset,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset,
|
||||
uint32_t size) {
|
||||
uint32_t size,
|
||||
ze_event_handle_t hSignalEvent) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
|
||||
commandContainer.addToResidencyContainer(dstPtrAlloc);
|
||||
commandContainer.addToResidencyContainer(srcPtrAlloc);
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -643,7 +639,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t srcSize, size_t dstSize) {
|
||||
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
Vec3<size_t> srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ};
|
||||
@@ -654,7 +650,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
||||
dstRowPitch, dstSlicePitch);
|
||||
commandContainer.addToResidencyContainer(dstAlloc);
|
||||
commandContainer.addToResidencyContainer(srcAlloc);
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -665,7 +664,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize) {
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dst, src,
|
||||
@@ -676,7 +675,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||
blitProperties.dstSize = dstSize;
|
||||
commandContainer.addToResidencyContainer(dst);
|
||||
commandContainer.addToResidencyContainer(src);
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImages(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -761,7 +763,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && leftSize) {
|
||||
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize))
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
@@ -772,7 +774,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes))
|
||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
@@ -784,7 +786,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize))
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
@@ -793,9 +795,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
Builtin::CopyBufferToBufferSide);
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
}
|
||||
this->appendSignalEventPostWalker(hSignalEvent);
|
||||
|
||||
if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
|
||||
NEO::PipeControlArgs args(true);
|
||||
@@ -840,12 +840,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
if (srcRegion->depth > 1) {
|
||||
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
|
||||
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
|
||||
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent)
|
||||
: this->appendMemoryCopyKernel3d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
|
||||
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
|
||||
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
|
||||
} else {
|
||||
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
|
||||
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
|
||||
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent)
|
||||
: this->appendMemoryCopyKernel2d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
|
||||
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset,
|
||||
srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
|
||||
@@ -1095,9 +1097,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
0, nullptr);
|
||||
}
|
||||
|
||||
if (hEvent) {
|
||||
this->appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
this->appendSignalEventPostWalker(hEvent);
|
||||
|
||||
if (hostPointerNeedsFlush) {
|
||||
NEO::PipeControlArgs args(true);
|
||||
@@ -1141,15 +1141,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
uint32_t patternToCommand[4] = {};
|
||||
memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitMemoryColorFill(allocData->gpuAllocation, patternToCommand, patternSize, *commandContainer.getCommandStream(), size, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
if (hEvent) {
|
||||
this->appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_handle_t hEvent) {
|
||||
if (hEvent == nullptr) {
|
||||
return;
|
||||
}
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
if (event->isTimestampEvent) {
|
||||
appendEventForProfiling(hEvent, false);
|
||||
@@ -1157,6 +1158,22 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hEvent);
|
||||
}
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
if (!event->isTimestampEvent) {
|
||||
return;
|
||||
}
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
auto contextOffset = beforeWalker ? offsetof(KernelTimestampEvent, contextStart) : offsetof(KernelTimestampEvent, contextEnd);
|
||||
auto globalOffset = beforeWalker ? offsetof(KernelTimestampEvent, globalStart) : offsetof(KernelTimestampEvent, globalEnd);
|
||||
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline uint64_t CommandListCoreFamily<gfxCoreFamily>::getInputBufferSize(NEO::ImageType imageType,
|
||||
|
||||
@@ -60,9 +60,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), isIndirect, isPredicate, kernel,
|
||||
0, device->getNEODevice(), commandListPreemptionMode);
|
||||
|
||||
if (hEvent) {
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
}
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
|
||||
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = kernel->getResidencyContainer();
|
||||
@@ -82,32 +80,28 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
|
||||
if (!hEvent) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
if (!event->isTimestampEvent) {
|
||||
return;
|
||||
}
|
||||
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
|
||||
if (beforeWalker) {
|
||||
auto contextStartAddr = baseAddr;
|
||||
auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart);
|
||||
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr);
|
||||
if (isCopyOnly()) {
|
||||
appendEventForProfilingCopyCommand(hEvent, beforeWalker);
|
||||
} else {
|
||||
auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd);
|
||||
auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd);
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
if (!event->isTimestampEvent) {
|
||||
return;
|
||||
}
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
|
||||
if (beforeWalker) {
|
||||
auto contextStartAddr = baseAddr;
|
||||
auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart);
|
||||
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr);
|
||||
|
||||
if (isCopyOnlyCmdList) {
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), globalEndAddr, 0llu, true, true);
|
||||
} else {
|
||||
auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd);
|
||||
auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd);
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = false;
|
||||
|
||||
|
||||
@@ -31,9 +31,17 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
: public ::L0::CommandListCoreFamily<gfxCoreFamily> {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::appendBlitFill;
|
||||
using BaseClass::appendCopyImageBlit;
|
||||
using BaseClass::appendEventForProfiling;
|
||||
using BaseClass::appendEventForProfilingCopyCommand;
|
||||
using BaseClass::appendLaunchKernelWithParams;
|
||||
using BaseClass::appendMemoryCopyBlit;
|
||||
using BaseClass::appendMemoryCopyBlitRegion;
|
||||
using BaseClass::appendSignalEventPostWalker;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::getAlignedAllocation;
|
||||
using BaseClass::hostPtrMap;
|
||||
|
||||
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}
|
||||
};
|
||||
|
||||
@@ -7,15 +7,16 @@
|
||||
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
||||
#include "level_zero/core/source/image/image_hw.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
@@ -247,7 +248,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgr
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
||||
public:
|
||||
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
|
||||
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
|
||||
|
||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
|
||||
return {0, 0, nullptr, true};
|
||||
@@ -267,7 +268,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
|
||||
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint32_t size) override {
|
||||
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
|
||||
appendMemoryCopyBlitCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -278,7 +279,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
|
||||
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t srcSize, size_t dstSize) override {
|
||||
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
appendMemoryCopyBlitRegionCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -315,7 +316,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
size_t bytesPerPixel, Vec3<size_t> copySize,
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize) override {
|
||||
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent) override {
|
||||
appendCopyImageBlitCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -400,7 +401,6 @@ using AppendMemoryCopy = CommandListCreate;
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockAppendMemoryCopy : public MockCommandList<gfxCoreFamily> {
|
||||
public:
|
||||
using CommandList::hostPtrMap;
|
||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
|
||||
}
|
||||
@@ -484,13 +484,13 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
|
||||
cmdList.initialize(device, true);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
|
||||
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
|
||||
|
||||
auto &commandContainer = cmdList.commandContainer;
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -507,15 +507,15 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
|
||||
|
||||
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
|
||||
cmdList.initialize(device, true);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
|
||||
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
|
||||
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
|
||||
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
|
||||
|
||||
auto &commandContainer = cmdList.commandContainer;
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
@@ -627,7 +627,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushTheP
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
||||
public:
|
||||
MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
|
||||
MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
|
||||
|
||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
|
||||
return {0, 0, nullptr, true};
|
||||
@@ -635,7 +635,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
|
||||
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint32_t size) override {
|
||||
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
|
||||
appendMemoryCopyBlitCalledTimes++;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -749,5 +749,195 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromImagBlitThenCommand
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
ZE_EVENT_POOL_FLAG_TIMESTAMP,
|
||||
1};
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE};
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
uint32_t size = 0x1000;
|
||||
|
||||
commandList->appendMemoryCopyBlit(&mockAllocationDst, 0, &mockAllocationSrc, 0, size, event->toHandle());
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
ZE_EVENT_POOL_FLAG_TIMESTAMP,
|
||||
1};
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE};
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
|
||||
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
|
||||
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
|
||||
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event->toHandle());
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToImageCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
ZE_EVENT_POOL_FLAG_TIMESTAMP,
|
||||
1};
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE};
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||
MemoryPool::System4KBPages);
|
||||
|
||||
commandList->appendCopyImageBlit(&mockAllocationDst, &mockAllocationSrc, {0, 0, 0}, {0, 0, 0}, 0, 0, 0, 0, 1, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, event->toHandle());
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
ZE_EVENT_POOL_FLAG_TIMESTAMP,
|
||||
1};
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE};
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event->toHandle(), true);
|
||||
|
||||
auto contextOffset = offsetof(KernelTimestampEvent, contextStart);
|
||||
auto globalOffset = offsetof(KernelTimestampEvent, globalStart);
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
ZE_EVENT_POOL_FLAG_TIMESTAMP,
|
||||
1};
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
ZE_EVENT_SCOPE_FLAG_NONE};
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event->toHandle(), false);
|
||||
|
||||
auto contextOffset = offsetof(KernelTimestampEvent, contextEnd);
|
||||
auto globalOffset = offsetof(KernelTimestampEvent, globalEnd);
|
||||
auto baseAddr = event->getGpuAddress();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream, Platforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, true);
|
||||
|
||||
auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
commandList->appendSignalEventPostWalker(nullptr);
|
||||
|
||||
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -150,6 +150,11 @@ struct EncodeStoreMMIO {
|
||||
static void encode(LinearStream &csr, uint32_t offset, uint64_t address);
|
||||
static void remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem);
|
||||
};
|
||||
template <typename GfxFamily>
|
||||
struct AppendStoreMMIO {
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
static void appendRemap(MI_STORE_REGISTER_MEM *cmd);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct EncodeSurfaceState {
|
||||
|
||||
Reference in New Issue
Block a user