Use single event for multiple kernels

Related-To: NEO-6871

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-04-08 18:48:45 +00:00
committed by Compute-Runtime-Automation
parent a3745c28a3
commit 819d648997
25 changed files with 1074 additions and 338 deletions

View File

@@ -181,4 +181,16 @@ void CommandList::handleIndirectAllocationResidency() {
}
}
bool CommandList::setupTimestampEventForMultiTile(ze_event_handle_t signalEvent) {
if (this->partitionCount > 1 &&
signalEvent) {
auto event = Event::fromHandle(signalEvent);
if (event->isEventTimestampFlagSet()) {
event->setPacketsInUse(this->partitionCount);
return true;
}
}
return false;
}
} // namespace L0

View File

@@ -264,6 +264,7 @@ struct CommandList : _ze_command_list_handle_t {
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
bool setupTimestampEventForMultiTile(ze_event_handle_t signalEvent);
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;

View File

@@ -230,7 +230,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition);
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);

View File

@@ -364,9 +364,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
return ret;
}
appendEventForProfiling(hSignalEvent, true, false);
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
appendEventForProfiling(hSignalEvent, true, workloadPartition);
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
appendSignalEventPostWalker(hSignalEvent, false);
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
executeCommandListImmediate(true);
@@ -800,22 +802,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (beforeWalker) {
appendEventForProfiling(hEvent, true, false);
} else {
appendSignalEventPostWalker(hEvent, false);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1075,11 +1061,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless;
}
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize);
} else {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset,
@@ -1088,17 +1076,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
hSignalEvent,
isStateless);
}
}
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
Builtin func = Builtin::CopyBufferToBufferMiddle;
if (isStateless) {
func = Builtin::CopyBufferToBufferMiddleStateless;
}
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes);
} else {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
@@ -1108,17 +1099,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
hSignalEvent,
isStateless);
}
}
if (ret == ZE_RESULT_SUCCESS && rightSize) {
Builtin func = Builtin::CopyBufferToBufferSide;
if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless;
}
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize);
} else {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
@@ -1127,6 +1121,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
hSignalEvent,
isStateless);
}
}
appendEventForProfilingAllWalkers(hSignalEvent, false);
@@ -1557,6 +1552,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
patternGfxAlloc);
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent);
if (res) {
return res;
@@ -1951,7 +1947,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
constexpr uint32_t mask = 0xfffffffe;
auto event = Event::fromHandle(hEvent);
auto baseAddr = event->getGpuAddress(this->device);
auto baseAddr = event->getPacketAddress(this->device);
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
@@ -1966,7 +1962,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
}
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask);
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2018,6 +2014,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
}
}
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
appendEventForProfiling(hSignalEvent, true, workloadPartition);
const auto &hwInfo = this->device->getHwInfo();
if (isCopyOnly()) {
NEO::MiFlushArgs args;
@@ -2031,17 +2030,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
} else {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlWithPostSync(
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
reinterpret_cast<uint64_t>(dstptr),
0,
hwInfo,
args);
}
if (hSignalEvent) {
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(hSignalEvent, false);
}
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
commandContainer.addToResidencyContainer(allocationStruct.alloc);
@@ -2263,7 +2261,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask) {}
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent,
@@ -2274,15 +2272,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
if (ret) {
return ret;
}
bool workloadPartition = false;
if (this->partitionCount > 1 &&
hSignalEvent) {
auto event = Event::fromHandle(hSignalEvent);
if (event->isEventTimestampFlagSet()) {
event->setPacketsInUse(this->partitionCount);
workloadPartition = true;
}
}
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
appendEventForProfiling(hSignalEvent, true, workloadPartition);
if (isCopyOnly()) {

View File

@@ -201,4 +201,20 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
return 0;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (beforeWalker) {
appendEventForProfiling(hEvent, true, false);
} else {
appendSignalEventPostWalker(hEvent, false);
}
}
} // namespace L0

View File

@@ -344,4 +344,30 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent) {
if (hEvent) {
Event::fromHandle(hEvent)->increaseKernelCount();
}
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
if (isCopyOnly()) {
if (beforeWalker) {
appendEventForProfiling(hEvent, true, false);
} else {
appendSignalEventPostWalker(hEvent, false);
}
} else {
if (hEvent && beforeWalker) {
auto event = Event::fromHandle(hEvent);
event->zeroKernelCount();
}
}
}
} // namespace L0

View File

@@ -96,6 +96,17 @@ struct Event : _ze_event_handle_t {
return isTimestampEvent || usingContextEndOffset;
}
void increaseKernelCount() {
kernelCount++;
UNRECOVERABLE_IF(kernelCount > EventPacketsCount::maxKernelSplit);
}
uint32_t getKernelCount() const {
return kernelCount;
}
void zeroKernelCount() {
kernelCount = 0;
}
uint64_t globalStartTS;
uint64_t globalEndTS;
uint64_t contextStartTS;
@@ -110,8 +121,6 @@ struct Event : _ze_event_handle_t {
ze_event_scope_flags_t signalScope = 0u;
ze_event_scope_flags_t waitScope = 0u;
uint32_t kernelCount = 1u;
bool l3FlushWaApplied = false;
protected:
@@ -122,6 +131,9 @@ struct Event : _ze_event_handle_t {
size_t timestampSizeInDw = 0u;
size_t singlePacketSize = 0u;
size_t eventPoolOffset = 0u;
uint32_t kernelCount = 1u;
bool isTimestampEvent = false;
bool usingContextEndOffset = false;
};
@@ -180,8 +192,7 @@ struct EventImp : public Event {
protected:
ze_result_t calculateProfilingData();
ze_result_t queryStatusKernelTimestamp();
ze_result_t queryStatusNonTimestamp();
ze_result_t queryStatusEventPackets();
ze_result_t hostEventSetValue(TagSizeT eventValue);
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
void assignKernelEventCompletionData(void *address);

View File

@@ -104,33 +104,13 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
ze_result_t EventImp<TagSizeT>::queryStatusEventPackets() {
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
static_cast<TagSizeT const *>(kernelEventCompletionData[i].getContextEndAddress(packetId)),
queryVal,
std::not_equal_to<TagSizeT>());
if (!ready) {
return ZE_RESULT_NOT_READY;
}
}
}
this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
return ZE_RESULT_SUCCESS;
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
assignKernelEventCompletionData(hostAddress);
uint32_t queryVal = Event::STATE_CLEARED;
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
void const *queryAddress = usingContextEndOffset
void const *queryAddress = isUsingContextEndOffset()
? kernelEventCompletionData[i].getContextEndAddress(packetId)
: kernelEventCompletionData[i].getContextStartAddress(packetId);
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
@@ -156,11 +136,7 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
*hostAddr = metricStreamer->getNotificationState();
}
this->csr->downloadAllocations();
if (isEventTimestampFlagSet()) {
return queryStatusKernelTimestamp();
} else {
return queryStatusNonTimestamp();
}
return queryStatusEventPackets();
}
template <typename TagSizeT>
@@ -274,12 +250,10 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::reset() {
if (isEventTimestampFlagSet()) {
kernelCount = EventPacketsCount::maxKernelSplit;
for (uint32_t i = 0; i < kernelCount; i++) {
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
}
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
return ZE_RESULT_SUCCESS;

View File

@@ -8,6 +8,8 @@
#pragma once
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/test_macros/test.h"
@@ -90,5 +92,96 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
std::unique_ptr<VariableBackup<bool>> osLocalMemoryBackup;
};
template <typename FamilyType>
void validateTimestampRegisters(GenCmdList &cmdList,
GenCmdList::iterator &startIt,
uint32_t firstLoadRegisterRegSrcAddress,
uint64_t firstStoreRegMemAddress,
uint32_t secondLoadRegisterRegSrcAddress,
uint64_t secondStoreRegMemAddress,
bool workloadPartition) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
constexpr uint32_t mask = 0xfffffffe;
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
startIt = itor;
}
} // namespace ult
} // namespace L0

View File

@@ -347,5 +347,76 @@ struct MockCommandList : public CommandList {
uint8_t *batchBuffer = nullptr;
NEO::GraphicsAllocation *mockAllocation = nullptr;
};
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
public:
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS,
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, void *srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint64_t size,
uint64_t elementSize, Builtin builtin,
ze_event_handle_t hSignalEvent,
bool isStateless),
(dstPtr, dstPtrAlloc, dstOffset, srcPtr, srcPtrAlloc, srcOffset, size, elementSize, builtin, hSignalEvent, isStateless));
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size));
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
NEO::GraphicsAllocation *dstAllocation,
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
}
uintptr_t srcAlignedPtr;
uintptr_t dstAlignedPtr;
size_t srcBlitCopyRegionOffset = 0;
size_t dstBlitCopyRegionOffset = 0;
};
} // namespace ult
} // namespace L0

View File

@@ -501,13 +501,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
itor++;
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
if (MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
EXPECT_NE(cmdList.end(), itor);
} else {
EXPECT_EQ(cmdList.end(), itor);
}
}
using platformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
@@ -540,22 +533,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
bool postSyncFound = false;
uint32_t postSyncFound = 0;
ASSERT_NE(0u, iterator.size());
uint32_t numPCs = 0;
for (auto it : iterator) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
numPCs++;
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
(cmd->getDcFlushEnable())) {
postSyncFound = true;
break;
postSyncFound++;
}
}
ASSERT_TRUE(postSyncFound);
EXPECT_EQ(numPCs, iterator.size());
EXPECT_EQ(1u, postSyncFound);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) {
@@ -585,22 +574,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
bool postSyncFound = false;
uint32_t postSyncFound = 0;
ASSERT_NE(0u, iterator.size());
uint32_t numPCs = 0;
for (auto it : iterator) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
numPCs++;
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
(!cmd->getDcFlushEnable())) {
postSyncFound = true;
break;
postSyncFound++;
}
}
ASSERT_TRUE(postSyncFound);
EXPECT_EQ(numPCs, iterator.size() - 1);
EXPECT_EQ(1u, postSyncFound);
auto it = *(iterator.end() - 1);
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*it);

View File

@@ -77,12 +77,19 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalle
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
auto iterator = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto cmd = genCmdCast<PIPE_CONTROL *>(*iterator);
auto pcList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, pcList.size());
bool foundTimestampPipeControl = false;
for (auto it : pcList) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, cmd->getPostSyncOperation());
foundTimestampPipeControl = true;
}
}
EXPECT_TRUE(foundTimestampPipeControl);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) {

View File

@@ -7,7 +7,6 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/test_macros/test.h"
@@ -382,82 +381,6 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
EXPECT_EQ(1u, postSyncFound);
}
template <typename FamilyType>
void validateTimestampRegisters(GenCmdList &cmdList,
uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
constexpr uint32_t mask = 0xfffffffe;
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress());
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress());
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
}
}
HWTEST2_F(MultiTileCommandListAppendBarrier,
GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@@ -533,7 +456,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
cmdBuffer,
timestampRegisters));
validateTimestampRegisters<FamilyType>(cmdList, globalStartAddress, contextStartAddress);
auto begin = cmdList.begin();
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
true);
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters;
@@ -557,7 +485,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
cmdBuffer,
timestampRegisters));
validateTimestampRegisters<FamilyType>(cmdList, globalEndAddress, contextEndAddress);
begin = cmdList.begin();
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
true);
}
} // namespace ult

View File

@@ -8,6 +8,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
@@ -17,70 +18,6 @@ namespace ult {
using AppendMemoryCopy = Test<DeviceFixture>;
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
ADDMETHOD_NOBASE(appendMemoryCopyKernelWithGA, ze_result_t, ZE_RESULT_SUCCESS,
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, void *srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint64_t size,
uint64_t elementSize, Builtin builtin,
ze_event_handle_t hSignalEvent,
bool isStateless));
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size));
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
NEO::GraphicsAllocation *dstAllocation,
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, const Vec3<size_t> &copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
}
uintptr_t srcAlignedPtr;
uintptr_t dstAlignedPtr;
size_t srcBlitCopyRegionOffset = 0;
size_t dstBlitCopyRegionOffset = 0;
};
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
@@ -343,12 +280,14 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
}
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) {
HWTEST2_F(AppendMemoryCopy,
givenCommandListUsesTimestampPassedToMemoryCopyWhenTwoKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
@@ -365,65 +304,97 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u);
EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u);
EXPECT_EQ(2u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(1u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
auto begin = cmdList.begin();
ASSERT_EQ(2u, itorWalkers.size());
auto secondWalker = itorWalkers[1];
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_FALSE(cmd->getDcFlushEnable());
}
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
false);
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
auto temp = itor;
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
//we should have only one PC with dcFlush added
ASSERT_EQ(1u, numPCs.size());
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
}
validateTimestampRegisters<FamilyType>(cmdList,
secondWalker,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
false);
}
HWTEST2_F(AppendMemoryCopy,
givenCommandListUsesTimestampPassedToMemoryCopyWhenThreeKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1231);
void *dstPtr = reinterpret_cast<void *>(0x200002345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(1u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
auto begin = cmdList.begin();
ASSERT_EQ(3u, itorWalkers.size());
auto thirdWalker = itorWalkers[2];
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
false);
validateTimestampRegisters<FamilyType>(cmdList,
thirdWalker,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
false);
}
} // namespace ult
} // namespace L0

View File

@@ -355,5 +355,79 @@ HWTEST2_F(CommandListAppendSignalEvent,
EXPECT_EQ(1u, postSyncFound);
}
HWTEST2_F(CommandListAppendSignalEvent,
givenMultiTileCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenWorkPartitionedRegistersAreUsed, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
auto &commandContainer = commandList->commandContainer;
uint64_t timestampAddress = 0x12345678555500;
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
constexpr uint32_t packets = 2u;
event->setEventTimestampFlag(true);
commandList->partitionCount = packets;
commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr);
EXPECT_EQ(packets, event->getPacketsInUse());
auto eventGpuAddress = event->getGpuAddress(device);
uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset();
uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset();
uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset();
uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
itorPC++;
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
}
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
auto startCmdList = cmdList.begin();
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
true);
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t);
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
REG_GLOBAL_TIMESTAMP_UN, globalStartAddressHigh,
0x23AC, contextStartAddressHigh,
true);
}
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
true);
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t);
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
REG_GLOBAL_TIMESTAMP_UN, globalEndAddressHigh,
0x23AC, contextEndAddressHigh,
true);
}
}
} // namespace ult
} // namespace L0

View File

@@ -210,9 +210,9 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThre
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
event->setPacketsInUse(3u);
event->kernelCount = 2;
event->increaseKernelCount();
event->setPacketsInUse(3u);
event->kernelCount = 3;
event->increaseKernelCount();
event->setPacketsInUse(3u);
ASSERT_EQ(9u, event->getPacketsInUse());

View File

@@ -6,11 +6,13 @@
*/
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@@ -210,5 +212,217 @@ HWTEST2_F(AppendFillTest,
delete[] nonMultipleDstPtr;
}
using IsBetweenGen9AndGen12lp = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
HWTEST2_F(AppendFillTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
sizeof(immediatePattern),
immediateAllocSize, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(1u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
auto begin = cmdList.begin();
ASSERT_EQ(2u, itorWalkers.size());
auto secondWalker = itorWalkers[1];
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
false);
validateTimestampRegisters<FamilyType>(cmdList,
secondWalker,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
false);
}
HWTEST2_F(AppendFillTest,
givenCallToAppendMemoryFillWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, event->getPacketsInUse());
EXPECT_EQ(1u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
auto begin = cmdList.begin();
ASSERT_EQ(2u, itorWalkers.size());
auto secondWalker = itorWalkers[1];
validateTimestampRegisters<FamilyType>(cmdList,
begin,
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
false);
validateTimestampRegisters<FamilyType>(cmdList,
secondWalker,
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
false);
}
HWTEST2_F(AppendFillTest,
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
sizeof(immediatePattern),
immediateAllocSize, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, event->getPacketsInUse());
EXPECT_EQ(2u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(2u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
HWTEST2_F(AppendFillTest,
givenCallToAppendMemoryFillWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, event->getPacketsInUse());
EXPECT_EQ(2u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(2u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
} // namespace ult
} // namespace L0

View File

@@ -278,5 +278,295 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
pCommandList->reset();
}
using AppendMemoryCopyXeHpAndLater = Test<DeviceFixture>;
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1231);
void *dstPtr = reinterpret_cast<void *>(0x200002345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(3u, event->getPacketsInUse());
EXPECT_EQ(3u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(3u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto thirdWalker = itorWalkers[2];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
IsAtLeastXeHpCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.partitionCount = 2;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1231);
void *dstPtr = reinterpret_cast<void *>(0x200002345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(6u, event->getPacketsInUse());
EXPECT_EQ(3u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(3u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto thirdWalker = itorWalkers[2];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
}
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWaHandled,
isXeHpOrXeHpgCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1231);
void *dstPtr = reinterpret_cast<void *>(0x200002345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(6u, event->getPacketsInUse());
EXPECT_EQ(3u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(3u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto thirdWalker = itorWalkers[2];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
uint64_t eventGpuAddress = firstKernelEventAddress + event->getSinglePacketSize();
if (event->isUsingContextEndOffset()) {
eventGpuAddress += event->getContextEndOffset();
}
uint32_t postSyncPipeControls = 0;
for (auto it : itorPipeControls) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
EXPECT_TRUE(cmd->getDcFlushEnable());
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
postSyncPipeControls++;
eventGpuAddress += (2 * event->getSinglePacketSize());
}
}
EXPECT_EQ(3u, postSyncPipeControls);
}
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlushWaHandled,
isXeHpOrXeHpgCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.appendMemoryCopyKernelWithGACallBase = true;
commandList.partitionCount = 2;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
void *srcPtr = reinterpret_cast<void *>(0x1231);
void *dstPtr = reinterpret_cast<void *>(0x200002345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 8 * event->getSinglePacketSize();
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
EXPECT_EQ(12u, event->getPacketsInUse());
EXPECT_EQ(3u, event->getKernelCount());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(3u, itorWalkers.size());
auto firstWalker = itorWalkers[0];
auto secondWalker = itorWalkers[1];
auto thirdWalker = itorWalkers[2];
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
uint64_t eventGpuAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize();
if (event->isUsingContextEndOffset()) {
eventGpuAddress += event->getContextEndOffset();
}
uint32_t postSyncPipeControls = 0;
for (auto it : itorPipeControls) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
EXPECT_TRUE(cmd->getDcFlushEnable());
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
postSyncPipeControls++;
eventGpuAddress += (4 * event->getSinglePacketSize());
}
}
EXPECT_EQ(3u, postSyncPipeControls);
}
} // namespace ult
} // namespace L0

View File

@@ -551,15 +551,27 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, eventPool);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
auto &l0HwHelper = L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
auto event = std::unique_ptr<L0::Event>(l0HwHelper.createEvent(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event);
if (L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()) {
if (l0HwHelper.multiTileCapablePlatform()) {
EXPECT_TRUE(event->isUsingContextEndOffset());
} else {
EXPECT_FALSE(event->isUsingContextEndOffset());
}
uint32_t *eventCompletionMemory = reinterpret_cast<uint32_t *>(event->getHostAddress());
if (event->isUsingContextEndOffset()) {
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getContextEndOffset());
}
uint32_t maxPacketsCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount;
for (uint32_t i = 0; i < maxPacketsCount; i++) {
EXPECT_EQ(Event::STATE_INITIAL, *eventCompletionMemory);
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getSinglePacketSize());
}
result = event->queryStatus();
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
@@ -1064,7 +1076,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
EXPECT_EQ(1u, event->getKernelCount());
}
TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) {
@@ -1093,13 +1105,13 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
gpuAddr += (4u * event->getSinglePacketSize());
event->kernelCount = 2;
event->increaseKernelCount();
event->setPacketsInUse(2u);
EXPECT_EQ(6u, event->getPacketsInUse());
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
gpuAddr += (2u * event->getSinglePacketSize());
event->kernelCount = 3;
event->increaseKernelCount();
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
EXPECT_EQ(7u, event->getPacketsInUse());
}
@@ -1122,7 +1134,7 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
}
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
}
EXPECT_EQ(1u, event->kernelCount);
EXPECT_EQ(1u, event->getKernelCount());
}
TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) {

View File

@@ -385,6 +385,42 @@ HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIs
EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo));
}
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncTimestampUsedThenCorrectPostSyncUsed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream stream(buffer.get(), 128);
uint64_t address = 0x1234567887654320;
uint64_t immediateData = 0x0;
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, args);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
ASSERT_NE(nullptr, pipeControl);
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation());
}
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncWriteImmediateDataUsedThenCorrectPostSyncUsed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream stream(buffer.get(), 128);
uint64_t address = 0x1234567887654320;
uint64_t immediateData = 0x1234;
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, args);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
ASSERT_NE(nullptr, pipeControl);
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
}
TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS;

View File

@@ -75,6 +75,10 @@ struct UnitTestHelper {
static void adjustKernelDescriptorForImplicitArgs(KernelDescriptor &kernelDescriptor);
static std::vector<bool> getProgrammedLargeGrfValues(CommandStreamReceiver &csr, LinearStream &linearStream);
static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem);
static bool timestampRegisterHighAddress();
};
} // namespace NEO

View File

@@ -70,4 +70,9 @@ inline uint64_t UnitTestHelper<GfxFamily>::getPipeControlPostSyncAddress(const t
return (gpuAddressHigh << 32) | gpuAddress;
}
template <typename GfxFamily>
bool UnitTestHelper<GfxFamily>::timestampRegisterHighAddress() {
return false;
}
} // namespace NEO

View File

@@ -72,4 +72,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
return {};
}
template <typename GfxFamily>
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
return false;
}
} // namespace NEO

View File

@@ -100,4 +100,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
return largeGrfValues;
}
template <typename GfxFamily>
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
return storeRegisterMem.getWorkloadPartitionIdOffsetEnable();
}
} // namespace NEO

View File

@@ -34,6 +34,7 @@ using IsAtMostXeHpgCore = IsAtMostGfxCore<IGFX_XE_HPG_CORE>;
using IsAtLeastXeHpcCore = IsAtLeastGfxCore<IGFX_XE_HPC_CORE>;
using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
using isXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
using isXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
using isXeHpcOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE_HPG_CORE>;