mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Use single event for multiple kernels
Related-To: NEO-6871 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a3745c28a3
commit
819d648997
@@ -181,4 +181,16 @@ void CommandList::handleIndirectAllocationResidency() {
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandList::setupTimestampEventForMultiTile(ze_event_handle_t signalEvent) {
|
||||
if (this->partitionCount > 1 &&
|
||||
signalEvent) {
|
||||
auto event = Event::fromHandle(signalEvent);
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -264,6 +264,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
bool setupTimestampEventForMultiTile(ze_event_handle_t signalEvent);
|
||||
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||
|
||||
@@ -230,7 +230,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
|
||||
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition);
|
||||
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
|
||||
@@ -364,9 +364,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendEventForProfiling(hSignalEvent, true, false);
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||
|
||||
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||
appendSignalEventPostWalker(hSignalEvent, false);
|
||||
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
|
||||
|
||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
||||
executeCommandListImmediate(true);
|
||||
@@ -800,22 +802,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
|
||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
@@ -1075,11 +1061,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferSideStateless;
|
||||
}
|
||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alignedAllocationPtr,
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize);
|
||||
} else {
|
||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
||||
@@ -1088,17 +1076,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
hSignalEvent,
|
||||
isStateless);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||
Builtin func = Builtin::CopyBufferToBufferMiddle;
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferMiddleStateless;
|
||||
}
|
||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alignedAllocationPtr,
|
||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes);
|
||||
} else {
|
||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
||||
@@ -1108,17 +1099,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
hSignalEvent,
|
||||
isStateless);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||
Builtin func = Builtin::CopyBufferToBufferSide;
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferSideStateless;
|
||||
}
|
||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||
srcAllocationStruct.alignedAllocationPtr,
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize)
|
||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize);
|
||||
} else {
|
||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
||||
@@ -1127,6 +1121,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
hSignalEvent,
|
||||
isStateless);
|
||||
}
|
||||
}
|
||||
|
||||
appendEventForProfilingAllWalkers(hSignalEvent, false);
|
||||
|
||||
@@ -1557,6 +1552,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
||||
patternGfxAlloc);
|
||||
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
||||
if (res) {
|
||||
return res;
|
||||
@@ -1951,7 +1947,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
auto baseAddr = event->getGpuAddress(this->device);
|
||||
auto baseAddr = event->getPacketAddress(this->device);
|
||||
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
||||
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
|
||||
|
||||
@@ -1966,7 +1962,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask);
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -2018,6 +2014,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
}
|
||||
}
|
||||
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
@@ -2031,17 +2030,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlWithPostSync(
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
||||
reinterpret_cast<uint64_t>(dstptr),
|
||||
0,
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(hSignalEvent, false);
|
||||
}
|
||||
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
|
||||
|
||||
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
|
||||
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
||||
@@ -2263,7 +2261,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask) {}
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent,
|
||||
@@ -2274,15 +2272,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
bool workloadPartition = false;
|
||||
if (this->partitionCount > 1 &&
|
||||
hSignalEvent) {
|
||||
auto event = Event::fromHandle(hSignalEvent);
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
workloadPartition = true;
|
||||
}
|
||||
}
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
|
||||
@@ -201,4 +201,20 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -344,4 +344,30 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
|
||||
false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pThreadGroupDimensions,
|
||||
ze_event_handle_t hEvent) {
|
||||
if (hEvent) {
|
||||
Event::fromHandle(hEvent)->increaseKernelCount();
|
||||
}
|
||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (isCopyOnly()) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
} else {
|
||||
if (hEvent && beforeWalker) {
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
event->zeroKernelCount();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -96,6 +96,17 @@ struct Event : _ze_event_handle_t {
|
||||
return isTimestampEvent || usingContextEndOffset;
|
||||
}
|
||||
|
||||
void increaseKernelCount() {
|
||||
kernelCount++;
|
||||
UNRECOVERABLE_IF(kernelCount > EventPacketsCount::maxKernelSplit);
|
||||
}
|
||||
uint32_t getKernelCount() const {
|
||||
return kernelCount;
|
||||
}
|
||||
void zeroKernelCount() {
|
||||
kernelCount = 0;
|
||||
}
|
||||
|
||||
uint64_t globalStartTS;
|
||||
uint64_t globalEndTS;
|
||||
uint64_t contextStartTS;
|
||||
@@ -110,8 +121,6 @@ struct Event : _ze_event_handle_t {
|
||||
ze_event_scope_flags_t signalScope = 0u;
|
||||
ze_event_scope_flags_t waitScope = 0u;
|
||||
|
||||
uint32_t kernelCount = 1u;
|
||||
|
||||
bool l3FlushWaApplied = false;
|
||||
|
||||
protected:
|
||||
@@ -122,6 +131,9 @@ struct Event : _ze_event_handle_t {
|
||||
size_t timestampSizeInDw = 0u;
|
||||
size_t singlePacketSize = 0u;
|
||||
size_t eventPoolOffset = 0u;
|
||||
|
||||
uint32_t kernelCount = 1u;
|
||||
|
||||
bool isTimestampEvent = false;
|
||||
bool usingContextEndOffset = false;
|
||||
};
|
||||
@@ -180,8 +192,7 @@ struct EventImp : public Event {
|
||||
|
||||
protected:
|
||||
ze_result_t calculateProfilingData();
|
||||
ze_result_t queryStatusKernelTimestamp();
|
||||
ze_result_t queryStatusNonTimestamp();
|
||||
ze_result_t queryStatusEventPackets();
|
||||
ze_result_t hostEventSetValue(TagSizeT eventValue);
|
||||
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
||||
void assignKernelEventCompletionData(void *address);
|
||||
|
||||
@@ -104,33 +104,13 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
|
||||
ze_result_t EventImp<TagSizeT>::queryStatusEventPackets() {
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
uint32_t queryVal = Event::STATE_CLEARED;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
||||
static_cast<TagSizeT const *>(kernelEventCompletionData[i].getContextEndAddress(packetId)),
|
||||
queryVal,
|
||||
std::not_equal_to<TagSizeT>());
|
||||
if (!ready) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
}
|
||||
}
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
|
||||
assignKernelEventCompletionData(hostAddress);
|
||||
uint32_t queryVal = Event::STATE_CLEARED;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||
void const *queryAddress = usingContextEndOffset
|
||||
void const *queryAddress = isUsingContextEndOffset()
|
||||
? kernelEventCompletionData[i].getContextEndAddress(packetId)
|
||||
: kernelEventCompletionData[i].getContextStartAddress(packetId);
|
||||
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
||||
@@ -156,11 +136,7 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||
*hostAddr = metricStreamer->getNotificationState();
|
||||
}
|
||||
this->csr->downloadAllocations();
|
||||
if (isEventTimestampFlagSet()) {
|
||||
return queryStatusKernelTimestamp();
|
||||
} else {
|
||||
return queryStatusNonTimestamp();
|
||||
}
|
||||
return queryStatusEventPackets();
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
@@ -274,12 +250,10 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::reset() {
|
||||
if (isEventTimestampFlagSet()) {
|
||||
kernelCount = EventPacketsCount::maxKernelSplit;
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
||||
}
|
||||
}
|
||||
hostEventSetValue(Event::STATE_INITIAL);
|
||||
resetPackets();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -90,5 +92,96 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
|
||||
std::unique_ptr<VariableBackup<bool>> osLocalMemoryBackup;
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
GenCmdList::iterator &startIt,
|
||||
uint32_t firstLoadRegisterRegSrcAddress,
|
||||
uint64_t firstStoreRegMemAddress,
|
||||
uint32_t secondLoadRegisterRegSrcAddress,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
bool workloadPartition) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
|
||||
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
itor++;
|
||||
startIt = itor;
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -347,5 +347,76 @@ struct MockCommandList : public CommandList {
|
||||
uint8_t *batchBuffer = nullptr;
|
||||
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
||||
};
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
public:
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
|
||||
ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS,
|
||||
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset, void *srcPtr,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint64_t size,
|
||||
uint64_t elementSize, Builtin builtin,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
bool isStateless),
|
||||
(dstPtr, dstPtrAlloc, dstOffset, srcPtr, srcPtrAlloc, srcOffset, size, elementSize, builtin, hSignalEvent, isStateless));
|
||||
|
||||
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uintptr_t dstPtr,
|
||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset, uintptr_t srcPtr,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset,
|
||||
uint64_t size));
|
||||
|
||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
|
||||
NEO::GraphicsAllocation *dstAllocation,
|
||||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
srcBlitCopyRegionOffset = srcOffset;
|
||||
dstBlitCopyRegionOffset = dstOffset;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
uintptr_t srcAlignedPtr;
|
||||
uintptr_t dstAlignedPtr;
|
||||
size_t srcBlitCopyRegionOffset = 0;
|
||||
size_t dstBlitCopyRegionOffset = 0;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -501,13 +501,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
|
||||
itor++;
|
||||
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
itor++;
|
||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
} else {
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
}
|
||||
}
|
||||
|
||||
using platformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
|
||||
@@ -540,22 +533,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
bool postSyncFound = false;
|
||||
uint32_t postSyncFound = 0;
|
||||
ASSERT_NE(0u, iterator.size());
|
||||
uint32_t numPCs = 0;
|
||||
for (auto it : iterator) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
numPCs++;
|
||||
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
||||
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
||||
(cmd->getDcFlushEnable())) {
|
||||
postSyncFound = true;
|
||||
break;
|
||||
postSyncFound++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
EXPECT_EQ(numPCs, iterator.size());
|
||||
EXPECT_EQ(1u, postSyncFound);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) {
|
||||
@@ -585,22 +574,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
bool postSyncFound = false;
|
||||
uint32_t postSyncFound = 0;
|
||||
ASSERT_NE(0u, iterator.size());
|
||||
uint32_t numPCs = 0;
|
||||
for (auto it : iterator) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
numPCs++;
|
||||
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
||||
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
||||
(!cmd->getDcFlushEnable())) {
|
||||
postSyncFound = true;
|
||||
break;
|
||||
postSyncFound++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_TRUE(postSyncFound);
|
||||
EXPECT_EQ(numPCs, iterator.size() - 1);
|
||||
EXPECT_EQ(1u, postSyncFound);
|
||||
|
||||
auto it = *(iterator.end() - 1);
|
||||
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
|
||||
@@ -77,12 +77,19 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalle
|
||||
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
|
||||
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
|
||||
|
||||
auto iterator = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*iterator);
|
||||
auto pcList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, pcList.size());
|
||||
bool foundTimestampPipeControl = false;
|
||||
for (auto it : pcList) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, cmd->getPostSyncOperation());
|
||||
foundTimestampPipeControl = true;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(foundTimestampPipeControl);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) {
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -382,82 +381,6 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
EXPECT_EQ(1u, postSyncFound);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress());
|
||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress());
|
||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
@@ -533,7 +456,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
cmdBuffer,
|
||||
timestampRegisters));
|
||||
validateTimestampRegisters<FamilyType>(cmdList, globalStartAddress, contextStartAddress);
|
||||
auto begin = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
true);
|
||||
|
||||
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters;
|
||||
|
||||
@@ -557,7 +485,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
cmdBuffer,
|
||||
timestampRegisters));
|
||||
validateTimestampRegisters<FamilyType>(cmdList, globalEndAddress, contextEndAddress);
|
||||
begin = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
true);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
@@ -17,70 +18,6 @@ namespace ult {
|
||||
|
||||
using AppendMemoryCopy = Test<DeviceFixture>;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
class MockAppendMemoryCopy : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
||||
public:
|
||||
ADDMETHOD_NOBASE(appendMemoryCopyKernelWithGA, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset, void *srcPtr,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset, uint64_t size,
|
||||
uint64_t elementSize, Builtin builtin,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
bool isStateless));
|
||||
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uintptr_t dstPtr,
|
||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
uint64_t dstOffset, uintptr_t srcPtr,
|
||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||
uint64_t srcOffset,
|
||||
uint64_t size));
|
||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
|
||||
}
|
||||
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||
uint32_t srcSlicePitch, size_t srcOffset,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override {
|
||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
|
||||
NEO::GraphicsAllocation *dstAllocation,
|
||||
size_t srcOffset,
|
||||
size_t dstOffset,
|
||||
ze_copy_region_t srcRegion,
|
||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||
size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch,
|
||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||
srcBlitCopyRegionOffset = srcOffset;
|
||||
dstBlitCopyRegionOffset = dstOffset;
|
||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
uintptr_t srcAlignedPtr;
|
||||
uintptr_t dstAlignedPtr;
|
||||
size_t srcBlitCopyRegionOffset = 0;
|
||||
size_t dstBlitCopyRegionOffset = 0;
|
||||
};
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) {
|
||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
@@ -343,12 +280,14 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
|
||||
}
|
||||
|
||||
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) {
|
||||
HWTEST2_F(AppendMemoryCopy,
|
||||
givenCommandListUsesTimestampPassedToMemoryCopyWhenTwoKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
@@ -365,65 +304,97 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
||||
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u);
|
||||
EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u);
|
||||
EXPECT_EQ(2u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
|
||||
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
auto begin = cmdList.begin();
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
itor++;
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
HWTEST2_F(AppendMemoryCopy,
|
||||
givenCommandListUsesTimestampPassedToMemoryCopyWhenThreeKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
auto begin = cmdList.begin();
|
||||
ASSERT_EQ(3u, itorWalkers.size());
|
||||
auto thirdWalker = itorWalkers[2];
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
thirdWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
itor++;
|
||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||
}
|
||||
|
||||
itor++;
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
}
|
||||
|
||||
itor++;
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
}
|
||||
|
||||
auto temp = itor;
|
||||
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
|
||||
//we should have only one PC with dcFlush added
|
||||
ASSERT_EQ(1u, numPCs.size());
|
||||
|
||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
|
||||
}
|
||||
}
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -355,5 +355,79 @@ HWTEST2_F(CommandListAppendSignalEvent,
|
||||
EXPECT_EQ(1u, postSyncFound);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendSignalEvent,
|
||||
givenMultiTileCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenWorkPartitionedRegistersAreUsed, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
|
||||
uint64_t timestampAddress = 0x12345678555500;
|
||||
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
|
||||
|
||||
constexpr uint32_t packets = 2u;
|
||||
|
||||
event->setEventTimestampFlag(true);
|
||||
commandList->partitionCount = packets;
|
||||
|
||||
commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||
|
||||
auto eventGpuAddress = event->getGpuAddress(device);
|
||||
uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset();
|
||||
uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset();
|
||||
uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset();
|
||||
uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
|
||||
while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
|
||||
itorPC++;
|
||||
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
|
||||
}
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
|
||||
auto startCmdList = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t);
|
||||
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_UN, globalStartAddressHigh,
|
||||
0x23AC, contextStartAddressHigh,
|
||||
true);
|
||||
}
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t);
|
||||
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_UN, globalEndAddressHigh,
|
||||
0x23AC, contextEndAddressHigh,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -210,9 +210,9 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThre
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
event->setPacketsInUse(3u);
|
||||
event->kernelCount = 2;
|
||||
event->increaseKernelCount();
|
||||
event->setPacketsInUse(3u);
|
||||
event->kernelCount = 3;
|
||||
event->increaseKernelCount();
|
||||
event->setPacketsInUse(3u);
|
||||
ASSERT_EQ(9u, event->getPacketsInUse());
|
||||
|
||||
|
||||
@@ -6,11 +6,13 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
@@ -210,5 +212,217 @@ HWTEST2_F(AppendFillTest,
|
||||
delete[] nonMultipleDstPtr;
|
||||
}
|
||||
|
||||
using IsBetweenGen9AndGen12lp = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
|
||||
sizeof(immediatePattern),
|
||||
immediateAllocSize, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
auto begin = cmdList.begin();
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
auto begin = cmdList.begin();
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
|
||||
sizeof(immediatePattern),
|
||||
immediateAllocSize, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||
EXPECT_EQ(2u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||
EXPECT_EQ(2u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(2u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -278,5 +278,295 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
|
||||
pCommandList->reset();
|
||||
}
|
||||
|
||||
using AppendMemoryCopyXeHpAndLater = Test<DeviceFixture>;
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
|
||||
IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(3u, event->getPacketsInUse());
|
||||
EXPECT_EQ(3u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(3u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
auto thirdWalker = itorWalkers[2];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
|
||||
IsAtLeastXeHpCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
commandList.partitionCount = 2;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||
EXPECT_EQ(3u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(3u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
auto thirdWalker = itorWalkers[2];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||
givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWaHandled,
|
||||
isXeHpOrXeHpgCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||
EXPECT_EQ(3u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(3u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
auto thirdWalker = itorWalkers[2];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
uint64_t eventGpuAddress = firstKernelEventAddress + event->getSinglePacketSize();
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventGpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControls) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
EXPECT_TRUE(cmd->getDcFlushEnable());
|
||||
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
postSyncPipeControls++;
|
||||
eventGpuAddress += (2 * event->getSinglePacketSize());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(3u, postSyncPipeControls);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||
givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlushWaHandled,
|
||||
isXeHpOrXeHpgCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||
commandList.partitionCount = 2;
|
||||
|
||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 8 * event->getSinglePacketSize();
|
||||
|
||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||
EXPECT_EQ(12u, event->getPacketsInUse());
|
||||
EXPECT_EQ(3u, event->getKernelCount());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(3u, itorWalkers.size());
|
||||
auto firstWalker = itorWalkers[0];
|
||||
auto secondWalker = itorWalkers[1];
|
||||
auto thirdWalker = itorWalkers[2];
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||
|
||||
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
uint64_t eventGpuAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize();
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventGpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
uint32_t postSyncPipeControls = 0;
|
||||
for (auto it : itorPipeControls) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
EXPECT_TRUE(cmd->getDcFlushEnable());
|
||||
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
postSyncPipeControls++;
|
||||
eventGpuAddress += (4 * event->getSinglePacketSize());
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(3u, postSyncPipeControls);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -551,15 +551,27 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
auto &l0HwHelper = L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
|
||||
auto event = std::unique_ptr<L0::Event>(l0HwHelper.createEvent(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(nullptr, event);
|
||||
|
||||
if (L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()) {
|
||||
if (l0HwHelper.multiTileCapablePlatform()) {
|
||||
EXPECT_TRUE(event->isUsingContextEndOffset());
|
||||
} else {
|
||||
EXPECT_FALSE(event->isUsingContextEndOffset());
|
||||
}
|
||||
|
||||
uint32_t *eventCompletionMemory = reinterpret_cast<uint32_t *>(event->getHostAddress());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getContextEndOffset());
|
||||
}
|
||||
uint32_t maxPacketsCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount;
|
||||
for (uint32_t i = 0; i < maxPacketsCount; i++) {
|
||||
EXPECT_EQ(Event::STATE_INITIAL, *eventCompletionMemory);
|
||||
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getSinglePacketSize());
|
||||
}
|
||||
|
||||
result = event->queryStatus();
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
||||
|
||||
@@ -1064,7 +1076,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor
|
||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||
}
|
||||
|
||||
EXPECT_EQ(1u, event->kernelCount);
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) {
|
||||
@@ -1093,13 +1105,13 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
|
||||
|
||||
gpuAddr += (4u * event->getSinglePacketSize());
|
||||
|
||||
event->kernelCount = 2;
|
||||
event->increaseKernelCount();
|
||||
event->setPacketsInUse(2u);
|
||||
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
||||
|
||||
gpuAddr += (2u * event->getSinglePacketSize());
|
||||
event->kernelCount = 3;
|
||||
event->increaseKernelCount();
|
||||
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
||||
EXPECT_EQ(7u, event->getPacketsInUse());
|
||||
}
|
||||
@@ -1122,7 +1134,7 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
|
||||
}
|
||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||
}
|
||||
EXPECT_EQ(1u, event->kernelCount);
|
||||
EXPECT_EQ(1u, event->getKernelCount());
|
||||
}
|
||||
|
||||
TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) {
|
||||
|
||||
@@ -385,6 +385,42 @@ HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIs
|
||||
EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo));
|
||||
}
|
||||
|
||||
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncTimestampUsedThenCorrectPostSyncUsed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
|
||||
|
||||
LinearStream stream(buffer.get(), 128);
|
||||
uint64_t address = 0x1234567887654320;
|
||||
uint64_t immediateData = 0x0;
|
||||
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
|
||||
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, args);
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation());
|
||||
}
|
||||
|
||||
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncWriteImmediateDataUsedThenCorrectPostSyncUsed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
|
||||
|
||||
LinearStream stream(buffer.get(), 128);
|
||||
uint64_t address = 0x1234567887654320;
|
||||
uint64_t immediateData = 0x1234;
|
||||
|
||||
PipeControlArgs args;
|
||||
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
|
||||
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, args);
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
}
|
||||
|
||||
TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS;
|
||||
|
||||
@@ -75,6 +75,10 @@ struct UnitTestHelper {
|
||||
static void adjustKernelDescriptorForImplicitArgs(KernelDescriptor &kernelDescriptor);
|
||||
|
||||
static std::vector<bool> getProgrammedLargeGrfValues(CommandStreamReceiver &csr, LinearStream &linearStream);
|
||||
|
||||
static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem);
|
||||
|
||||
static bool timestampRegisterHighAddress();
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -70,4 +70,9 @@ inline uint64_t UnitTestHelper<GfxFamily>::getPipeControlPostSyncAddress(const t
|
||||
return (gpuAddressHigh << 32) | gpuAddress;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool UnitTestHelper<GfxFamily>::timestampRegisterHighAddress() {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -72,4 +72,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -100,4 +100,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
|
||||
return largeGrfValues;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
|
||||
return storeRegisterMem.getWorkloadPartitionIdOffsetEnable();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -34,6 +34,7 @@ using IsAtMostXeHpgCore = IsAtMostGfxCore<IGFX_XE_HPG_CORE>;
|
||||
using IsAtLeastXeHpcCore = IsAtLeastGfxCore<IGFX_XE_HPC_CORE>;
|
||||
using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
|
||||
|
||||
using isXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
||||
using isXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||
using isXeHpcOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE_HPG_CORE>;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user