Add event profiling for copy commandLists

Change-Id: I9f13e48b4139b3ce3c802c2d38b0ce054e64562c
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-06-01 15:34:10 +02:00
committed by sys_ocldev
parent cb24b95833
commit 008af5b6e4
6 changed files with 293 additions and 77 deletions

View File

@@ -133,7 +133,8 @@ struct CommandListCoreFamily : CommandListImp {
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size);
uint64_t srcOffset, uint32_t size,
ze_event_handle_t hSignalEvent);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc,
NEO::GraphicsAllocation *dstAlloc,
@@ -141,7 +142,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t srcSize, size_t dstSize);
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(NEO::GraphicsAllocation *dstAlloc, NEO::GraphicsAllocation *srcAlloc,
Builtin builtin, const ze_copy_region_t *dstRegion,
@@ -168,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp {
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize);
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent);
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
@@ -183,6 +184,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]);
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
bool useMemCopyToBlitFill(size_t patternSize);

View File

@@ -125,9 +125,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
ze_result_t ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
nullptr, true, false);
if (hEvent) {
appendSignalEventPostWalker(hEvent);
}
appendSignalEventPostWalker(hEvent);
return ret;
}
@@ -160,9 +158,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
}
}
if (hEvent) {
appendSignalEventPostWalker(hEvent);
}
appendSignalEventPostWalker(hEvent);
return ZE_RESULT_SUCCESS;
}
@@ -204,9 +200,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
if (hSignalEvent) {
this->appendSignalEventPostWalker(hSignalEvent);
}
appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@@ -225,9 +219,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
if (hSignalEvent) {
this->appendSignalEventPostWalker(hSignalEvent);
}
this->appendSignalEventPostWalker(hSignalEvent);
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
executeCommandListImmediate(true);
@@ -273,7 +265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
if (isCopyOnlyCmdList) {
return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize);
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, hEvent);
}
Kernel *builtinKernel = nullptr;
@@ -379,7 +371,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
if (isCopyOnlyCmdList) {
return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth});
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, hEvent);
}
Kernel *builtinKernel = nullptr;
@@ -525,7 +517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize);
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, hEvent);
}
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
@@ -627,12 +619,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::Grap
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size) {
uint32_t size,
ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@@ -643,7 +639,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t srcSize, size_t dstSize) {
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
Vec3<size_t> srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ};
@@ -654,7 +650,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
dstRowPitch, dstSlicePitch);
commandContainer.addToResidencyContainer(dstAlloc);
commandContainer.addToResidencyContainer(srcAlloc);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@@ -665,7 +664,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize) {
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dst, src,
@@ -676,7 +675,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
blitProperties.dstSize = dstSize;
commandContainer.addToResidencyContainer(dst);
commandContainer.addToResidencyContainer(src);
appendEventForProfiling(hSignalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImages(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
this->appendSignalEventPostWalker(hSignalEvent);
return ZE_RESULT_SUCCESS;
}
@@ -761,7 +763,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
if (ret == ZE_RESULT_SUCCESS && leftSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize))
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
@@ -772,7 +774,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes))
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
@@ -784,7 +786,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
if (ret == ZE_RESULT_SUCCESS && rightSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize))
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
@@ -793,9 +795,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
Builtin::CopyBufferToBufferSide);
}
if (hSignalEvent) {
this->appendSignalEventPostWalker(hSignalEvent);
}
this->appendSignalEventPostWalker(hSignalEvent);
if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
NEO::PipeControlArgs args(true);
@@ -840,12 +840,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_result_t result = ZE_RESULT_SUCCESS;
if (srcRegion->depth > 1) {
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent)
: this->appendMemoryCopyKernel3d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
} else {
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent)
: this->appendMemoryCopyKernel2d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
@@ -1095,9 +1097,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
0, nullptr);
}
if (hEvent) {
this->appendSignalEventPostWalker(hEvent);
}
this->appendSignalEventPostWalker(hEvent);
if (hostPointerNeedsFlush) {
NEO::PipeControlArgs args(true);
@@ -1141,15 +1141,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
uint32_t patternToCommand[4] = {};
memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitMemoryColorFill(allocData->gpuAllocation, patternToCommand, patternSize, *commandContainer.getCommandStream(), size, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
if (hEvent) {
this->appendSignalEventPostWalker(hEvent);
}
appendSignalEventPostWalker(hEvent);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_handle_t hEvent) {
if (hEvent == nullptr) {
return;
}
auto event = Event::fromHandle(hEvent);
if (event->isTimestampEvent) {
appendEventForProfiling(hEvent, false);
@@ -1157,6 +1158,22 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hEvent);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto event = Event::fromHandle(hEvent);
if (!event->isTimestampEvent) {
return;
}
commandContainer.addToResidencyContainer(&event->getAllocation());
auto baseAddr = event->getGpuAddress();
auto contextOffset = beforeWalker ? offsetof(KernelTimestampEvent, contextStart) : offsetof(KernelTimestampEvent, contextEnd);
auto globalOffset = beforeWalker ? offsetof(KernelTimestampEvent, globalStart) : offsetof(KernelTimestampEvent, globalEnd);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset));
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset));
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline uint64_t CommandListCoreFamily<gfxCoreFamily>::getInputBufferSize(NEO::ImageType imageType,

View File

@@ -60,9 +60,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
reinterpret_cast<const void *>(pThreadGroupDimensions), isIndirect, isPredicate, kernel,
0, device->getNEODevice(), commandListPreemptionMode);
if (hEvent) {
appendSignalEventPostWalker(hEvent);
}
appendSignalEventPostWalker(hEvent);
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer();
@@ -82,32 +80,28 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
if (!hEvent) {
return;
}
auto event = Event::fromHandle(hEvent);
if (!event->isTimestampEvent) {
return;
}
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
commandContainer.addToResidencyContainer(&event->getAllocation());
auto baseAddr = event->getGpuAddress();
if (beforeWalker) {
auto contextStartAddr = baseAddr;
auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr);
if (isCopyOnly()) {
appendEventForProfilingCopyCommand(hEvent, beforeWalker);
} else {
auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd);
auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd);
auto event = Event::fromHandle(hEvent);
if (!event->isTimestampEvent) {
return;
}
commandContainer.addToResidencyContainer(&event->getAllocation());
auto baseAddr = event->getGpuAddress();
if (beforeWalker) {
auto contextStartAddr = baseAddr;
auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr);
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), globalEndAddr, 0llu, true, true);
} else {
auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd);
auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd);
NEO::PipeControlArgs args;
args.dcFlushEnable = false;

View File

@@ -31,9 +31,17 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
: public ::L0::CommandListCoreFamily<gfxCoreFamily> {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
using BaseClass::appendBlitFill;
using BaseClass::appendCopyImageBlit;
using BaseClass::appendEventForProfiling;
using BaseClass::appendEventForProfilingCopyCommand;
using BaseClass::appendLaunchKernelWithParams;
using BaseClass::appendMemoryCopyBlit;
using BaseClass::appendMemoryCopyBlitRegion;
using BaseClass::appendSignalEventPostWalker;
using BaseClass::commandListPreemptionMode;
using BaseClass::getAlignedAllocation;
using BaseClass::hostPtrMap;
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}
};

View File

@@ -7,15 +7,16 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
@@ -247,7 +248,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgr
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
@@ -267,7 +268,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size) override {
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -278,7 +279,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t srcSize, size_t dstSize) override {
size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) override {
appendMemoryCopyBlitRegionCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -315,7 +316,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamil
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize) override {
Vec3<uint32_t> srcSize, Vec3<uint32_t> dstSize, ze_event_handle_t hSignalEvent) override {
appendCopyImageBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -400,7 +401,6 @@ using AppendMemoryCopy = CommandListCreate;
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public MockCommandList<gfxCoreFamily> {
public:
using CommandList::hostPtrMap;
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
}
@@ -484,13 +484,13 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
cmdList.initialize(device, true);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -507,15 +507,15 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT;
WhiteBox<CommandListCoreFamily<gfxCoreFamily>> cmdList(1);
cmdList.initialize(device, true);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
@@ -627,7 +627,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushTheP
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
@@ -635,7 +635,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size) override {
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -749,5 +749,195 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromImagBlitThenCommand
EXPECT_NE(cmdList.end(), itor);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
1};
ze_event_desc_t eventDesc = {
ZE_EVENT_DESC_VERSION_CURRENT,
0,
ZE_EVENT_SCOPE_FLAG_NONE,
ZE_EVENT_SCOPE_FLAG_NONE};
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
uint32_t size = 0x1000;
commandList->appendMemoryCopyBlit(&mockAllocationDst, 0, &mockAllocationSrc, 0, size, event->toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
1};
ze_event_desc_t eventDesc = {
ZE_EVENT_DESC_VERSION_CURRENT,
0,
ZE_EVENT_SCOPE_FLAG_NONE,
ZE_EVENT_SCOPE_FLAG_NONE};
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event->toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToImageCopyBlitThenTimeStampRegistersAreAdded, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
1};
ze_event_desc_t eventDesc = {
ZE_EVENT_DESC_VERSION_CURRENT,
0,
ZE_EVENT_SCOPE_FLAG_NONE,
ZE_EVENT_SCOPE_FLAG_NONE};
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
commandList->appendCopyImageBlit(&mockAllocationDst, &mockAllocationSrc, {0, 0, 0}, {0, 0, 0}, 0, 0, 0, 0, 1, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, event->toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
1};
ze_event_desc_t eventDesc = {
ZE_EVENT_DESC_VERSION_CURRENT,
0,
ZE_EVENT_SCOPE_FLAG_NONE,
ZE_EVENT_SCOPE_FLAG_NONE};
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfilingCopyCommand(event->toHandle(), true);
auto contextOffset = offsetof(KernelTimestampEvent, contextStart);
auto globalOffset = offsetof(KernelTimestampEvent, globalStart);
auto baseAddr = event->getGpuAddress();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
ze_event_pool_desc_t eventPoolDesc = {
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
ZE_EVENT_POOL_FLAG_TIMESTAMP,
1};
ze_event_desc_t eventDesc = {
ZE_EVENT_DESC_VERSION_CURRENT,
0,
ZE_EVENT_SCOPE_FLAG_NONE,
ZE_EVENT_SCOPE_FLAG_NONE};
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfilingCopyCommand(event->toHandle(), false);
auto contextOffset = offsetof(KernelTimestampEvent, contextEnd);
auto globalOffset = offsetof(KernelTimestampEvent, globalEnd);
auto baseAddr = event->getGpuAddress();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
}
HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, true);
auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed();
commandList->appendSignalEventPostWalker(nullptr);
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
}
} // namespace ult
} // namespace L0

View File

@@ -150,6 +150,11 @@ struct EncodeStoreMMIO {
static void encode(LinearStream &csr, uint32_t offset, uint64_t address);
static void remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem);
};
template <typename GfxFamily>
struct AppendStoreMMIO {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
static void appendRemap(MI_STORE_REGISTER_MEM *cmd);
};
template <typename GfxFamily>
struct EncodeSurfaceState {