mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
Use single event for multiple kernels
Related-To: NEO-6871 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a3745c28a3
commit
819d648997
@@ -181,4 +181,16 @@ void CommandList::handleIndirectAllocationResidency() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CommandList::setupTimestampEventForMultiTile(ze_event_handle_t signalEvent) {
|
||||||
|
if (this->partitionCount > 1 &&
|
||||||
|
signalEvent) {
|
||||||
|
auto event = Event::fromHandle(signalEvent);
|
||||||
|
if (event->isEventTimestampFlagSet()) {
|
||||||
|
event->setPacketsInUse(this->partitionCount);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -264,6 +264,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
protected:
|
protected:
|
||||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||||
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||||
|
bool setupTimestampEventForMultiTile(ze_event_handle_t signalEvent);
|
||||||
|
|
||||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||||
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||||
|
|||||||
@@ -230,7 +230,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
|
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
|
||||||
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask);
|
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition);
|
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition);
|
||||||
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
|
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
|
||||||
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
||||||
|
|||||||
@@ -364,9 +364,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
appendEventForProfiling(hSignalEvent, true, false);
|
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||||
|
|
||||||
|
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||||
appendSignalEventPostWalker(hSignalEvent, false);
|
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
|
||||||
|
|
||||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
||||||
executeCommandListImmediate(true);
|
executeCommandListImmediate(true);
|
||||||
@@ -800,22 +802,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
|
|||||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
|
||||||
const ze_group_count_t *pThreadGroupDimensions,
|
|
||||||
ze_event_handle_t hEvent) {
|
|
||||||
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
|
||||||
if (beforeWalker) {
|
|
||||||
appendEventForProfiling(hEvent, true, false);
|
|
||||||
} else {
|
|
||||||
appendSignalEventPostWalker(hEvent, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
|
||||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
@@ -1075,18 +1061,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
if (isStateless) {
|
if (isStateless) {
|
||||||
func = Builtin::CopyBufferToBufferSideStateless;
|
func = Builtin::CopyBufferToBufferSideStateless;
|
||||||
}
|
}
|
||||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
if (isCopyOnly()) {
|
||||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||||
srcAllocationStruct.alignedAllocationPtr,
|
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||||
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize)
|
srcAllocationStruct.alignedAllocationPtr,
|
||||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize);
|
||||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
} else {
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||||
leftSize, 1UL,
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
func,
|
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
||||||
hSignalEvent,
|
leftSize, 1UL,
|
||||||
isStateless);
|
func,
|
||||||
|
hSignalEvent,
|
||||||
|
isStateless);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||||
@@ -1094,19 +1083,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
if (isStateless) {
|
if (isStateless) {
|
||||||
func = Builtin::CopyBufferToBufferMiddleStateless;
|
func = Builtin::CopyBufferToBufferMiddleStateless;
|
||||||
}
|
}
|
||||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
if (isCopyOnly()) {
|
||||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||||
srcAllocationStruct.alignedAllocationPtr,
|
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes)
|
srcAllocationStruct.alignedAllocationPtr,
|
||||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes);
|
||||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
} else {
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||||
middleSizeBytes,
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
middleElSize,
|
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
||||||
func,
|
middleSizeBytes,
|
||||||
hSignalEvent,
|
middleElSize,
|
||||||
isStateless);
|
func,
|
||||||
|
hSignalEvent,
|
||||||
|
isStateless);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||||
@@ -1114,18 +1106,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
if (isStateless) {
|
if (isStateless) {
|
||||||
func = Builtin::CopyBufferToBufferSideStateless;
|
func = Builtin::CopyBufferToBufferSideStateless;
|
||||||
}
|
}
|
||||||
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
if (isCopyOnly()) {
|
||||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||||
srcAllocationStruct.alignedAllocationPtr,
|
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize)
|
srcAllocationStruct.alignedAllocationPtr,
|
||||||
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize);
|
||||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
} else {
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||||
rightSize, 1UL,
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
func,
|
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
||||||
hSignalEvent,
|
rightSize, 1UL,
|
||||||
isStateless);
|
func,
|
||||||
|
hSignalEvent,
|
||||||
|
isStateless);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
appendEventForProfilingAllWalkers(hSignalEvent, false);
|
appendEventForProfilingAllWalkers(hSignalEvent, false);
|
||||||
@@ -1557,6 +1552,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
||||||
patternGfxAlloc);
|
patternGfxAlloc);
|
||||||
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||||
|
|
||||||
res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
@@ -1951,7 +1947,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
|||||||
constexpr uint32_t mask = 0xfffffffe;
|
constexpr uint32_t mask = 0xfffffffe;
|
||||||
auto event = Event::fromHandle(hEvent);
|
auto event = Event::fromHandle(hEvent);
|
||||||
|
|
||||||
auto baseAddr = event->getGpuAddress(this->device);
|
auto baseAddr = event->getPacketAddress(this->device);
|
||||||
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
||||||
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
|
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
|
||||||
|
|
||||||
@@ -1966,7 +1962,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
|||||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
|
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
|
||||||
}
|
}
|
||||||
|
|
||||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask);
|
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -2018,6 +2014,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||||
|
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||||
|
|
||||||
const auto &hwInfo = this->device->getHwInfo();
|
const auto &hwInfo = this->device->getHwInfo();
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
NEO::MiFlushArgs args;
|
NEO::MiFlushArgs args;
|
||||||
@@ -2031,17 +2030,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
|||||||
} else {
|
} else {
|
||||||
NEO::PipeControlArgs args;
|
NEO::PipeControlArgs args;
|
||||||
|
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlWithPostSync(
|
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||||
*commandContainer.getCommandStream(),
|
*commandContainer.getCommandStream(),
|
||||||
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
||||||
reinterpret_cast<uint64_t>(dstptr),
|
reinterpret_cast<uint64_t>(dstptr),
|
||||||
0,
|
0,
|
||||||
|
hwInfo,
|
||||||
args);
|
args);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hSignalEvent) {
|
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
|
||||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(hSignalEvent, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
|
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
|
||||||
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
||||||
@@ -2263,7 +2261,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask) {}
|
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent,
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent,
|
||||||
@@ -2274,15 +2272,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
bool workloadPartition = false;
|
bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent);
|
||||||
if (this->partitionCount > 1 &&
|
|
||||||
hSignalEvent) {
|
|
||||||
auto event = Event::fromHandle(hSignalEvent);
|
|
||||||
if (event->isEventTimestampFlagSet()) {
|
|
||||||
event->setPacketsInUse(this->partitionCount);
|
|
||||||
workloadPartition = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||||
|
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
|
|||||||
@@ -201,4 +201,20 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||||
|
const ze_group_count_t *pThreadGroupDimensions,
|
||||||
|
ze_event_handle_t hEvent) {
|
||||||
|
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||||
|
if (beforeWalker) {
|
||||||
|
appendEventForProfiling(hEvent, true, false);
|
||||||
|
} else {
|
||||||
|
appendSignalEventPostWalker(hEvent, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -344,4 +344,30 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::estimateBufferSizeMultiTileB
|
|||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
|
||||||
|
const ze_group_count_t *pThreadGroupDimensions,
|
||||||
|
ze_event_handle_t hEvent) {
|
||||||
|
if (hEvent) {
|
||||||
|
Event::fromHandle(hEvent)->increaseKernelCount();
|
||||||
|
}
|
||||||
|
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||||
|
if (isCopyOnly()) {
|
||||||
|
if (beforeWalker) {
|
||||||
|
appendEventForProfiling(hEvent, true, false);
|
||||||
|
} else {
|
||||||
|
appendSignalEventPostWalker(hEvent, false);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (hEvent && beforeWalker) {
|
||||||
|
auto event = Event::fromHandle(hEvent);
|
||||||
|
event->zeroKernelCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -96,6 +96,17 @@ struct Event : _ze_event_handle_t {
|
|||||||
return isTimestampEvent || usingContextEndOffset;
|
return isTimestampEvent || usingContextEndOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void increaseKernelCount() {
|
||||||
|
kernelCount++;
|
||||||
|
UNRECOVERABLE_IF(kernelCount > EventPacketsCount::maxKernelSplit);
|
||||||
|
}
|
||||||
|
uint32_t getKernelCount() const {
|
||||||
|
return kernelCount;
|
||||||
|
}
|
||||||
|
void zeroKernelCount() {
|
||||||
|
kernelCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t globalStartTS;
|
uint64_t globalStartTS;
|
||||||
uint64_t globalEndTS;
|
uint64_t globalEndTS;
|
||||||
uint64_t contextStartTS;
|
uint64_t contextStartTS;
|
||||||
@@ -110,8 +121,6 @@ struct Event : _ze_event_handle_t {
|
|||||||
ze_event_scope_flags_t signalScope = 0u;
|
ze_event_scope_flags_t signalScope = 0u;
|
||||||
ze_event_scope_flags_t waitScope = 0u;
|
ze_event_scope_flags_t waitScope = 0u;
|
||||||
|
|
||||||
uint32_t kernelCount = 1u;
|
|
||||||
|
|
||||||
bool l3FlushWaApplied = false;
|
bool l3FlushWaApplied = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@@ -122,6 +131,9 @@ struct Event : _ze_event_handle_t {
|
|||||||
size_t timestampSizeInDw = 0u;
|
size_t timestampSizeInDw = 0u;
|
||||||
size_t singlePacketSize = 0u;
|
size_t singlePacketSize = 0u;
|
||||||
size_t eventPoolOffset = 0u;
|
size_t eventPoolOffset = 0u;
|
||||||
|
|
||||||
|
uint32_t kernelCount = 1u;
|
||||||
|
|
||||||
bool isTimestampEvent = false;
|
bool isTimestampEvent = false;
|
||||||
bool usingContextEndOffset = false;
|
bool usingContextEndOffset = false;
|
||||||
};
|
};
|
||||||
@@ -180,8 +192,7 @@ struct EventImp : public Event {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
ze_result_t calculateProfilingData();
|
ze_result_t calculateProfilingData();
|
||||||
ze_result_t queryStatusKernelTimestamp();
|
ze_result_t queryStatusEventPackets();
|
||||||
ze_result_t queryStatusNonTimestamp();
|
|
||||||
ze_result_t hostEventSetValue(TagSizeT eventValue);
|
ze_result_t hostEventSetValue(TagSizeT eventValue);
|
||||||
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
||||||
void assignKernelEventCompletionData(void *address);
|
void assignKernelEventCompletionData(void *address);
|
||||||
|
|||||||
@@ -104,33 +104,13 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename TagSizeT>
|
template <typename TagSizeT>
|
||||||
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
|
ze_result_t EventImp<TagSizeT>::queryStatusEventPackets() {
|
||||||
assignKernelEventCompletionData(hostAddress);
|
assignKernelEventCompletionData(hostAddress);
|
||||||
uint32_t queryVal = Event::STATE_CLEARED;
|
uint32_t queryVal = Event::STATE_CLEARED;
|
||||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||||
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
void const *queryAddress = isUsingContextEndOffset()
|
||||||
static_cast<TagSizeT const *>(kernelEventCompletionData[i].getContextEndAddress(packetId)),
|
|
||||||
queryVal,
|
|
||||||
std::not_equal_to<TagSizeT>());
|
|
||||||
if (!ready) {
|
|
||||||
return ZE_RESULT_NOT_READY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
|
||||||
return ZE_RESULT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TagSizeT>
|
|
||||||
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
|
|
||||||
assignKernelEventCompletionData(hostAddress);
|
|
||||||
uint32_t queryVal = Event::STATE_CLEARED;
|
|
||||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
|
||||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
|
||||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
|
||||||
void const *queryAddress = usingContextEndOffset
|
|
||||||
? kernelEventCompletionData[i].getContextEndAddress(packetId)
|
? kernelEventCompletionData[i].getContextEndAddress(packetId)
|
||||||
: kernelEventCompletionData[i].getContextStartAddress(packetId);
|
: kernelEventCompletionData[i].getContextStartAddress(packetId);
|
||||||
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
||||||
@@ -156,11 +136,7 @@ ze_result_t EventImp<TagSizeT>::queryStatus() {
|
|||||||
*hostAddr = metricStreamer->getNotificationState();
|
*hostAddr = metricStreamer->getNotificationState();
|
||||||
}
|
}
|
||||||
this->csr->downloadAllocations();
|
this->csr->downloadAllocations();
|
||||||
if (isEventTimestampFlagSet()) {
|
return queryStatusEventPackets();
|
||||||
return queryStatusKernelTimestamp();
|
|
||||||
} else {
|
|
||||||
return queryStatusNonTimestamp();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TagSizeT>
|
template <typename TagSizeT>
|
||||||
@@ -274,11 +250,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
|||||||
|
|
||||||
template <typename TagSizeT>
|
template <typename TagSizeT>
|
||||||
ze_result_t EventImp<TagSizeT>::reset() {
|
ze_result_t EventImp<TagSizeT>::reset() {
|
||||||
if (isEventTimestampFlagSet()) {
|
kernelCount = EventPacketsCount::maxKernelSplit;
|
||||||
kernelCount = EventPacketsCount::maxKernelSplit;
|
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
||||||
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
hostEventSetValue(Event::STATE_INITIAL);
|
hostEventSetValue(Event::STATE_INITIAL);
|
||||||
resetPackets();
|
resetPackets();
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "shared/source/command_container/implicit_scaling.h"
|
#include "shared/source/command_container/implicit_scaling.h"
|
||||||
|
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||||
|
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||||
#include "shared/test/common/helpers/variable_backup.h"
|
#include "shared/test/common/helpers/variable_backup.h"
|
||||||
#include "shared/test/common/test_macros/test.h"
|
#include "shared/test/common/test_macros/test.h"
|
||||||
|
|
||||||
@@ -90,5 +92,96 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
|
|||||||
std::unique_ptr<VariableBackup<bool>> osLocalMemoryBackup;
|
std::unique_ptr<VariableBackup<bool>> osLocalMemoryBackup;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename FamilyType>
|
||||||
|
void validateTimestampRegisters(GenCmdList &cmdList,
|
||||||
|
GenCmdList::iterator &startIt,
|
||||||
|
uint32_t firstLoadRegisterRegSrcAddress,
|
||||||
|
uint64_t firstStoreRegMemAddress,
|
||||||
|
uint32_t secondLoadRegisterRegSrcAddress,
|
||||||
|
uint64_t secondStoreRegMemAddress,
|
||||||
|
bool workloadPartition) {
|
||||||
|
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||||
|
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||||
|
using MI_MATH = typename FamilyType::MI_MATH;
|
||||||
|
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
|
constexpr uint32_t mask = 0xfffffffe;
|
||||||
|
|
||||||
|
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
|
||||||
|
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||||
|
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||||
|
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||||
|
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||||
|
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||||
|
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||||
|
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||||
|
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||||
|
if (workloadPartition) {
|
||||||
|
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||||
|
} else {
|
||||||
|
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||||
|
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||||
|
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||||
|
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||||
|
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||||
|
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
itor++;
|
||||||
|
{
|
||||||
|
ASSERT_NE(cmdList.end(), itor);
|
||||||
|
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||||
|
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||||
|
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||||
|
if (workloadPartition) {
|
||||||
|
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||||
|
} else {
|
||||||
|
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
itor++;
|
||||||
|
startIt = itor;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -347,5 +347,76 @@ struct MockCommandList : public CommandList {
|
|||||||
uint8_t *batchBuffer = nullptr;
|
uint8_t *batchBuffer = nullptr;
|
||||||
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
class MockAppendMemoryCopy : public CommandListCoreFamily<gfxCoreFamily> {
|
||||||
|
public:
|
||||||
|
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||||
|
|
||||||
|
ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS,
|
||||||
|
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
|
uint64_t dstOffset, void *srcPtr,
|
||||||
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
|
uint64_t srcOffset, uint64_t size,
|
||||||
|
uint64_t elementSize, Builtin builtin,
|
||||||
|
ze_event_handle_t hSignalEvent,
|
||||||
|
bool isStateless),
|
||||||
|
(dstPtr, dstPtrAlloc, dstOffset, srcPtr, srcPtrAlloc, srcOffset, size, elementSize, builtin, hSignalEvent, isStateless));
|
||||||
|
|
||||||
|
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
|
||||||
|
(uintptr_t dstPtr,
|
||||||
|
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
|
uint64_t dstOffset, uintptr_t srcPtr,
|
||||||
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
|
uint64_t srcOffset,
|
||||||
|
uint64_t size));
|
||||||
|
|
||||||
|
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
|
||||||
|
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||||
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||||
|
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||||
|
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||||
|
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
uint32_t srcSlicePitch, size_t srcOffset,
|
||||||
|
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||||
|
ze_event_handle_t *phWaitEvents) override {
|
||||||
|
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
||||||
|
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
||||||
|
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
|
||||||
|
NEO::GraphicsAllocation *dstAllocation,
|
||||||
|
size_t srcOffset,
|
||||||
|
size_t dstOffset,
|
||||||
|
ze_copy_region_t srcRegion,
|
||||||
|
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
||||||
|
size_t srcRowPitch, size_t srcSlicePitch,
|
||||||
|
size_t dstRowPitch, size_t dstSlicePitch,
|
||||||
|
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
||||||
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||||
|
srcBlitCopyRegionOffset = srcOffset;
|
||||||
|
dstBlitCopyRegionOffset = dstOffset;
|
||||||
|
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
}
|
||||||
|
uintptr_t srcAlignedPtr;
|
||||||
|
uintptr_t dstAlignedPtr;
|
||||||
|
size_t srcBlitCopyRegionOffset = 0;
|
||||||
|
size_t dstBlitCopyRegionOffset = 0;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -501,13 +501,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS
|
|||||||
itor++;
|
itor++;
|
||||||
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
|
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
itor++;
|
|
||||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
|
||||||
if (MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo)) {
|
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
|
||||||
} else {
|
|
||||||
EXPECT_EQ(cmdList.end(), itor);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using platformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
|
using platformSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
|
||||||
@@ -540,22 +533,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
|
|||||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
bool postSyncFound = false;
|
uint32_t postSyncFound = 0;
|
||||||
ASSERT_NE(0u, iterator.size());
|
ASSERT_NE(0u, iterator.size());
|
||||||
uint32_t numPCs = 0;
|
|
||||||
for (auto it : iterator) {
|
for (auto it : iterator) {
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
numPCs++;
|
|
||||||
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
||||||
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
||||||
(cmd->getDcFlushEnable())) {
|
(cmd->getDcFlushEnable())) {
|
||||||
postSyncFound = true;
|
postSyncFound++;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_TRUE(postSyncFound);
|
EXPECT_EQ(1u, postSyncFound);
|
||||||
EXPECT_EQ(numPCs, iterator.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) {
|
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) {
|
||||||
@@ -585,22 +574,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS
|
|||||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
auto iterator = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
bool postSyncFound = false;
|
uint32_t postSyncFound = 0;
|
||||||
ASSERT_NE(0u, iterator.size());
|
ASSERT_NE(0u, iterator.size());
|
||||||
uint32_t numPCs = 0;
|
|
||||||
for (auto it : iterator) {
|
for (auto it : iterator) {
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
numPCs++;
|
|
||||||
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) &&
|
||||||
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
(cmd->getImmediateData() == Event::STATE_SIGNALED) &&
|
||||||
(!cmd->getDcFlushEnable())) {
|
(!cmd->getDcFlushEnable())) {
|
||||||
postSyncFound = true;
|
postSyncFound++;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_TRUE(postSyncFound);
|
EXPECT_EQ(1u, postSyncFound);
|
||||||
EXPECT_EQ(numPCs, iterator.size() - 1);
|
|
||||||
|
|
||||||
auto it = *(iterator.end() - 1);
|
auto it = *(iterator.end() - 1);
|
||||||
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*it);
|
auto cmd1 = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
|
|||||||
@@ -77,12 +77,19 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalle
|
|||||||
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
|
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
|
||||||
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
|
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
|
||||||
|
|
||||||
auto iterator = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
auto pcList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*iterator);
|
ASSERT_NE(0u, pcList.size());
|
||||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
bool foundTimestampPipeControl = false;
|
||||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
for (auto it : pcList) {
|
||||||
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, cmd->getPostSyncOperation());
|
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
|
||||||
|
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||||
|
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||||
|
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||||
|
foundTimestampPipeControl = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(foundTimestampPipeControl);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) {
|
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) {
|
||||||
|
|||||||
@@ -7,7 +7,6 @@
|
|||||||
|
|
||||||
#include "shared/source/command_container/command_encoder.h"
|
#include "shared/source/command_container/command_encoder.h"
|
||||||
#include "shared/source/helpers/hw_helper.h"
|
#include "shared/source/helpers/hw_helper.h"
|
||||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
|
||||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||||
#include "shared/test/common/test_macros/test.h"
|
#include "shared/test/common/test_macros/test.h"
|
||||||
|
|
||||||
@@ -382,82 +381,6 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
|||||||
EXPECT_EQ(1u, postSyncFound);
|
EXPECT_EQ(1u, postSyncFound);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename FamilyType>
|
|
||||||
void validateTimestampRegisters(GenCmdList &cmdList,
|
|
||||||
uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) {
|
|
||||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
|
||||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
|
||||||
using MI_MATH = typename FamilyType::MI_MATH;
|
|
||||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
|
||||||
|
|
||||||
constexpr uint32_t mask = 0xfffffffe;
|
|
||||||
|
|
||||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
|
||||||
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
|
||||||
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress());
|
|
||||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
|
||||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
|
||||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
|
||||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
|
||||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
|
||||||
EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress());
|
|
||||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
|
||||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress());
|
|
||||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
|
||||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
|
||||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
|
||||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
{
|
|
||||||
ASSERT_NE(cmdList.end(), itor);
|
|
||||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
|
||||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
|
||||||
EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress());
|
|
||||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(MultiTileCommandListAppendBarrier,
|
HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||||
GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) {
|
GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) {
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
@@ -533,7 +456,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
|||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
cmdBuffer,
|
cmdBuffer,
|
||||||
timestampRegisters));
|
timestampRegisters));
|
||||||
validateTimestampRegisters<FamilyType>(cmdList, globalStartAddress, contextStartAddress);
|
auto begin = cmdList.begin();
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
begin,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
|
true);
|
||||||
|
|
||||||
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters;
|
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters;
|
||||||
|
|
||||||
@@ -557,7 +485,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
|||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
cmdBuffer,
|
cmdBuffer,
|
||||||
timestampRegisters));
|
timestampRegisters));
|
||||||
validateTimestampRegisters<FamilyType>(cmdList, globalEndAddress, contextEndAddress);
|
begin = cmdList.begin();
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
begin,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||||
#include "shared/test/common/test_macros/test.h"
|
#include "shared/test/common/test_macros/test.h"
|
||||||
|
|
||||||
|
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||||
@@ -17,70 +18,6 @@ namespace ult {
|
|||||||
|
|
||||||
using AppendMemoryCopy = Test<DeviceFixture>;
|
using AppendMemoryCopy = Test<DeviceFixture>;
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
|
||||||
class MockAppendMemoryCopy : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
|
||||||
public:
|
|
||||||
ADDMETHOD_NOBASE(appendMemoryCopyKernelWithGA, ze_result_t, ZE_RESULT_SUCCESS,
|
|
||||||
(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
|
||||||
uint64_t dstOffset, void *srcPtr,
|
|
||||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
|
||||||
uint64_t srcOffset, uint64_t size,
|
|
||||||
uint64_t elementSize, Builtin builtin,
|
|
||||||
ze_event_handle_t hSignalEvent,
|
|
||||||
bool isStateless));
|
|
||||||
ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS,
|
|
||||||
(uintptr_t dstPtr,
|
|
||||||
NEO::GraphicsAllocation *dstPtrAlloc,
|
|
||||||
uint64_t dstOffset, uintptr_t srcPtr,
|
|
||||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
|
||||||
uint64_t srcOffset,
|
|
||||||
uint64_t size));
|
|
||||||
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override {
|
|
||||||
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy);
|
|
||||||
}
|
|
||||||
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
|
||||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
|
||||||
uint32_t dstPitch, size_t dstOffset,
|
|
||||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
|
||||||
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
|
||||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
|
||||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
|
||||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
|
||||||
}
|
|
||||||
|
|
||||||
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
|
|
||||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
|
||||||
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
|
||||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
|
||||||
uint32_t srcSlicePitch, size_t srcOffset,
|
|
||||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
|
||||||
ze_event_handle_t *phWaitEvents) override {
|
|
||||||
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
|
|
||||||
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
|
|
||||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
|
|
||||||
}
|
|
||||||
|
|
||||||
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
|
|
||||||
NEO::GraphicsAllocation *dstAllocation,
|
|
||||||
size_t srcOffset,
|
|
||||||
size_t dstOffset,
|
|
||||||
ze_copy_region_t srcRegion,
|
|
||||||
ze_copy_region_t dstRegion, const Vec3<size_t> ©Size,
|
|
||||||
size_t srcRowPitch, size_t srcSlicePitch,
|
|
||||||
size_t dstRowPitch, size_t dstSlicePitch,
|
|
||||||
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize, ze_event_handle_t hSignalEvent,
|
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
|
||||||
srcBlitCopyRegionOffset = srcOffset;
|
|
||||||
dstBlitCopyRegionOffset = dstOffset;
|
|
||||||
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
|
|
||||||
}
|
|
||||||
uintptr_t srcAlignedPtr;
|
|
||||||
uintptr_t dstAlignedPtr;
|
|
||||||
size_t srcBlitCopyRegionOffset = 0;
|
|
||||||
size_t dstBlitCopyRegionOffset = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) {
|
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) {
|
||||||
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
|
||||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
@@ -343,12 +280,14 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
|
|||||||
}
|
}
|
||||||
|
|
||||||
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
|
||||||
HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) {
|
HWTEST2_F(AppendMemoryCopy,
|
||||||
|
givenCommandListUsesTimestampPassedToMemoryCopyWhenTwoKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
|
||||||
|
|
||||||
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
|
||||||
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
@@ -365,65 +304,97 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA
|
|||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||||
|
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||||
|
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||||
|
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||||
|
|
||||||
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
|
||||||
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u);
|
EXPECT_EQ(2u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u);
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
commandList.commandContainer.getCommandStream()->getUsed()));
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
|
||||||
{
|
|
||||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
|
||||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
auto begin = cmdList.begin();
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
ASSERT_EQ(2u, itorWalkers.size());
|
||||||
{
|
auto secondWalker = itorWalkers[1];
|
||||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
|
||||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
begin,
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
{
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
false);
|
||||||
EXPECT_FALSE(cmd->getDcFlushEnable());
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
secondWalker,
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
{
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
false);
|
||||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
|
||||||
}
|
|
||||||
|
|
||||||
itor++;
|
|
||||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
|
||||||
{
|
|
||||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
|
||||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto temp = itor;
|
|
||||||
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
|
|
||||||
//we should have only one PC with dcFlush added
|
|
||||||
ASSERT_EQ(1u, numPCs.size());
|
|
||||||
|
|
||||||
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
|
|
||||||
EXPECT_NE(cmdList.end(), itor);
|
|
||||||
{
|
|
||||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
|
||||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendMemoryCopy,
|
||||||
|
givenCommandListUsesTimestampPassedToMemoryCopyWhenThreeKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||||
|
|
||||||
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
|
||||||
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||||
|
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||||
|
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||||
|
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||||
|
|
||||||
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
|
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
auto begin = cmdList.begin();
|
||||||
|
ASSERT_EQ(3u, itorWalkers.size());
|
||||||
|
auto thirdWalker = itorWalkers[2];
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
begin,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
|
false);
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
thirdWalker,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -355,5 +355,79 @@ HWTEST2_F(CommandListAppendSignalEvent,
|
|||||||
EXPECT_EQ(1u, postSyncFound);
|
EXPECT_EQ(1u, postSyncFound);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListAppendSignalEvent,
|
||||||
|
givenMultiTileCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenWorkPartitionedRegistersAreUsed, IsAtLeastXeHpCore) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
|
||||||
|
uint64_t timestampAddress = 0x12345678555500;
|
||||||
|
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
|
||||||
|
|
||||||
|
constexpr uint32_t packets = 2u;
|
||||||
|
|
||||||
|
event->setEventTimestampFlag(true);
|
||||||
|
commandList->partitionCount = packets;
|
||||||
|
|
||||||
|
commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||||
|
|
||||||
|
auto eventGpuAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset();
|
||||||
|
uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset();
|
||||||
|
uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset();
|
||||||
|
uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset();
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_NE(cmdList.end(), itorPC);
|
||||||
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
|
||||||
|
while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
|
||||||
|
itorPC++;
|
||||||
|
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
|
||||||
|
EXPECT_NE(cmdList.end(), itorPC);
|
||||||
|
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||||
|
EXPECT_FALSE(cmd->getDcFlushEnable());
|
||||||
|
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||||
|
|
||||||
|
auto startCmdList = cmdList.begin();
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
startCmdList,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
|
true);
|
||||||
|
|
||||||
|
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||||
|
uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t);
|
||||||
|
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
startCmdList,
|
||||||
|
REG_GLOBAL_TIMESTAMP_UN, globalStartAddressHigh,
|
||||||
|
0x23AC, contextStartAddressHigh,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
startCmdList,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
|
true);
|
||||||
|
|
||||||
|
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||||
|
uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t);
|
||||||
|
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
startCmdList,
|
||||||
|
REG_GLOBAL_TIMESTAMP_UN, globalEndAddressHigh,
|
||||||
|
0x23AC, contextEndAddressHigh,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -210,9 +210,9 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThre
|
|||||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
event->setPacketsInUse(3u);
|
event->setPacketsInUse(3u);
|
||||||
event->kernelCount = 2;
|
event->increaseKernelCount();
|
||||||
event->setPacketsInUse(3u);
|
event->setPacketsInUse(3u);
|
||||||
event->kernelCount = 3;
|
event->increaseKernelCount();
|
||||||
event->setPacketsInUse(3u);
|
event->setPacketsInUse(3u);
|
||||||
ASSERT_EQ(9u, event->getPacketsInUse());
|
ASSERT_EQ(9u, event->getPacketsInUse());
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,13 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/memory_manager/memory_manager.h"
|
#include "shared/source/memory_manager/memory_manager.h"
|
||||||
|
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||||
#include "shared/test/common/test_macros/test.h"
|
#include "shared/test/common/test_macros/test.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
|
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
|
||||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||||
|
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||||
@@ -210,5 +212,217 @@ HWTEST2_F(AppendFillTest,
|
|||||||
delete[] nonMultipleDstPtr;
|
delete[] nonMultipleDstPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using IsBetweenGen9AndGen12lp = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||||
|
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||||
|
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||||
|
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
|
||||||
|
sizeof(immediatePattern),
|
||||||
|
immediateAllocSize, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
auto begin = cmdList.begin();
|
||||||
|
ASSERT_EQ(2u, itorWalkers.size());
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
begin,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
|
false);
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
secondWalker,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset();
|
||||||
|
uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset();
|
||||||
|
uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset();
|
||||||
|
uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
auto begin = cmdList.begin();
|
||||||
|
ASSERT_EQ(2u, itorWalkers.size());
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
begin,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||||
|
false);
|
||||||
|
|
||||||
|
validateTimestampRegisters<FamilyType>(cmdList,
|
||||||
|
secondWalker,
|
||||||
|
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||||
|
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern,
|
||||||
|
sizeof(immediatePattern),
|
||||||
|
immediateAllocSize, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(2u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(2u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(2u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(2u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -278,5 +278,295 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
|
|||||||
pCommandList->reset();
|
pCommandList->reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using AppendMemoryCopyXeHpAndLater = Test<DeviceFixture>;
|
||||||
|
|
||||||
|
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||||
|
givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels,
|
||||||
|
IsAtLeastXeHpCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
|
||||||
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
|
||||||
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize();
|
||||||
|
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||||
|
|
||||||
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
|
EXPECT_EQ(3u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(3u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(3u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
auto thirdWalker = itorWalkers[2];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||||
|
givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels,
|
||||||
|
IsAtLeastXeHpCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
|
||||||
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
commandList.partitionCount = 2;
|
||||||
|
|
||||||
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||||
|
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||||
|
|
||||||
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
|
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(3u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(3u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
auto thirdWalker = itorWalkers[2];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||||
|
givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWaHandled,
|
||||||
|
isXeHpOrXeHpgCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
|
||||||
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
|
||||||
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize();
|
||||||
|
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||||
|
|
||||||
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
|
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(3u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(3u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
auto thirdWalker = itorWalkers[2];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
uint64_t eventGpuAddress = firstKernelEventAddress + event->getSinglePacketSize();
|
||||||
|
if (event->isUsingContextEndOffset()) {
|
||||||
|
eventGpuAddress += event->getContextEndOffset();
|
||||||
|
}
|
||||||
|
uint32_t postSyncPipeControls = 0;
|
||||||
|
for (auto it : itorPipeControls) {
|
||||||
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
|
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||||
|
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||||
|
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||||
|
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||||
|
EXPECT_TRUE(cmd->getDcFlushEnable());
|
||||||
|
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||||
|
postSyncPipeControls++;
|
||||||
|
eventGpuAddress += (2 * event->getSinglePacketSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_EQ(3u, postSyncPipeControls);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendMemoryCopyXeHpAndLater,
|
||||||
|
givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlushWaHandled,
|
||||||
|
isXeHpOrXeHpgCore) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
|
||||||
|
MockAppendMemoryCopy<gfxCoreFamily> commandList;
|
||||||
|
commandList.appendMemoryCopyKernelWithGACallBase = true;
|
||||||
|
commandList.partitionCount = 2;
|
||||||
|
|
||||||
|
commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1231);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x200002345);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
|
||||||
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
uint64_t firstKernelEventAddress = event->getGpuAddress(device);
|
||||||
|
uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize();
|
||||||
|
uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 8 * event->getSinglePacketSize();
|
||||||
|
|
||||||
|
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled);
|
||||||
|
EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled);
|
||||||
|
EXPECT_EQ(12u, event->getPacketsInUse());
|
||||||
|
EXPECT_EQ(3u, event->getKernelCount());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||||
|
commandList.commandContainer.getCommandStream()->getUsed()));
|
||||||
|
|
||||||
|
auto itorWalkers = findAll<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(3u, itorWalkers.size());
|
||||||
|
auto firstWalker = itorWalkers[0];
|
||||||
|
auto secondWalker = itorWalkers[1];
|
||||||
|
auto thirdWalker = itorWalkers[2];
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*firstWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*secondWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
walkerCmd = genCmdCast<COMPUTE_WALKER *>(*thirdWalker);
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation());
|
||||||
|
EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress());
|
||||||
|
|
||||||
|
auto itorPipeControls = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
uint64_t eventGpuAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize();
|
||||||
|
if (event->isUsingContextEndOffset()) {
|
||||||
|
eventGpuAddress += event->getContextEndOffset();
|
||||||
|
}
|
||||||
|
uint32_t postSyncPipeControls = 0;
|
||||||
|
for (auto it : itorPipeControls) {
|
||||||
|
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||||
|
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||||
|
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||||
|
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||||
|
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||||
|
EXPECT_TRUE(cmd->getDcFlushEnable());
|
||||||
|
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||||
|
postSyncPipeControls++;
|
||||||
|
eventGpuAddress += (4 * event->getSinglePacketSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_EQ(3u, postSyncPipeControls);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -551,15 +551,27 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO
|
|||||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
ASSERT_NE(nullptr, eventPool);
|
ASSERT_NE(nullptr, eventPool);
|
||||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
|
||||||
|
auto &l0HwHelper = L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(l0HwHelper.createEvent(eventPool.get(), &eventDesc, device));
|
||||||
ASSERT_NE(nullptr, event);
|
ASSERT_NE(nullptr, event);
|
||||||
|
|
||||||
if (L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()) {
|
if (l0HwHelper.multiTileCapablePlatform()) {
|
||||||
EXPECT_TRUE(event->isUsingContextEndOffset());
|
EXPECT_TRUE(event->isUsingContextEndOffset());
|
||||||
} else {
|
} else {
|
||||||
EXPECT_FALSE(event->isUsingContextEndOffset());
|
EXPECT_FALSE(event->isUsingContextEndOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t *eventCompletionMemory = reinterpret_cast<uint32_t *>(event->getHostAddress());
|
||||||
|
if (event->isUsingContextEndOffset()) {
|
||||||
|
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getContextEndOffset());
|
||||||
|
}
|
||||||
|
uint32_t maxPacketsCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount;
|
||||||
|
for (uint32_t i = 0; i < maxPacketsCount; i++) {
|
||||||
|
EXPECT_EQ(Event::STATE_INITIAL, *eventCompletionMemory);
|
||||||
|
eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getSinglePacketSize());
|
||||||
|
}
|
||||||
|
|
||||||
result = event->queryStatus();
|
result = event->queryStatus();
|
||||||
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
||||||
|
|
||||||
@@ -1064,7 +1076,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor
|
|||||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_EQ(1u, event->kernelCount);
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) {
|
TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) {
|
||||||
@@ -1093,13 +1105,13 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
|
|||||||
|
|
||||||
gpuAddr += (4u * event->getSinglePacketSize());
|
gpuAddr += (4u * event->getSinglePacketSize());
|
||||||
|
|
||||||
event->kernelCount = 2;
|
event->increaseKernelCount();
|
||||||
event->setPacketsInUse(2u);
|
event->setPacketsInUse(2u);
|
||||||
EXPECT_EQ(6u, event->getPacketsInUse());
|
EXPECT_EQ(6u, event->getPacketsInUse());
|
||||||
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
||||||
|
|
||||||
gpuAddr += (2u * event->getSinglePacketSize());
|
gpuAddr += (2u * event->getSinglePacketSize());
|
||||||
event->kernelCount = 3;
|
event->increaseKernelCount();
|
||||||
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
EXPECT_EQ(gpuAddr, event->getPacketAddress(device));
|
||||||
EXPECT_EQ(7u, event->getPacketsInUse());
|
EXPECT_EQ(7u, event->getPacketsInUse());
|
||||||
}
|
}
|
||||||
@@ -1122,7 +1134,7 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
|
|||||||
}
|
}
|
||||||
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
|
||||||
}
|
}
|
||||||
EXPECT_EQ(1u, event->kernelCount);
|
EXPECT_EQ(1u, event->getKernelCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) {
|
TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) {
|
||||||
|
|||||||
@@ -385,6 +385,42 @@ HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIs
|
|||||||
EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo));
|
EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncTimestampUsedThenCorrectPostSyncUsed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
|
||||||
|
|
||||||
|
LinearStream stream(buffer.get(), 128);
|
||||||
|
uint64_t address = 0x1234567887654320;
|
||||||
|
uint64_t immediateData = 0x0;
|
||||||
|
|
||||||
|
PipeControlArgs args;
|
||||||
|
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
|
||||||
|
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, args);
|
||||||
|
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
|
||||||
|
ASSERT_NE(nullptr, pipeControl);
|
||||||
|
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||||
|
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
|
||||||
|
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncWriteImmediateDataUsedThenCorrectPostSyncUsed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
|
||||||
|
|
||||||
|
LinearStream stream(buffer.get(), 128);
|
||||||
|
uint64_t address = 0x1234567887654320;
|
||||||
|
uint64_t immediateData = 0x1234;
|
||||||
|
|
||||||
|
PipeControlArgs args;
|
||||||
|
MemorySynchronizationCommands<FamilyType>::addPipeControlWithPostSync(
|
||||||
|
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, args);
|
||||||
|
auto pipeControl = genCmdCast<PIPE_CONTROL *>(stream.getCpuBase());
|
||||||
|
ASSERT_NE(nullptr, pipeControl);
|
||||||
|
EXPECT_EQ(address, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||||
|
EXPECT_EQ(immediateData, pipeControl->getImmediateData());
|
||||||
|
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) {
|
TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) {
|
||||||
HardwareInfo hwInfo = *defaultHwInfo;
|
HardwareInfo hwInfo = *defaultHwInfo;
|
||||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS;
|
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS;
|
||||||
|
|||||||
@@ -75,6 +75,10 @@ struct UnitTestHelper {
|
|||||||
static void adjustKernelDescriptorForImplicitArgs(KernelDescriptor &kernelDescriptor);
|
static void adjustKernelDescriptorForImplicitArgs(KernelDescriptor &kernelDescriptor);
|
||||||
|
|
||||||
static std::vector<bool> getProgrammedLargeGrfValues(CommandStreamReceiver &csr, LinearStream &linearStream);
|
static std::vector<bool> getProgrammedLargeGrfValues(CommandStreamReceiver &csr, LinearStream &linearStream);
|
||||||
|
|
||||||
|
static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem);
|
||||||
|
|
||||||
|
static bool timestampRegisterHighAddress();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -70,4 +70,9 @@ inline uint64_t UnitTestHelper<GfxFamily>::getPipeControlPostSyncAddress(const t
|
|||||||
return (gpuAddressHigh << 32) | gpuAddress;
|
return (gpuAddressHigh << 32) | gpuAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
bool UnitTestHelper<GfxFamily>::timestampRegisterHighAddress() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -72,4 +72,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -100,4 +100,9 @@ std::vector<bool> UnitTestHelper<GfxFamily>::getProgrammedLargeGrfValues(Command
|
|||||||
return largeGrfValues;
|
return largeGrfValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) {
|
||||||
|
return storeRegisterMem.getWorkloadPartitionIdOffsetEnable();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ using IsAtMostXeHpgCore = IsAtMostGfxCore<IGFX_XE_HPG_CORE>;
|
|||||||
using IsAtLeastXeHpcCore = IsAtLeastGfxCore<IGFX_XE_HPC_CORE>;
|
using IsAtLeastXeHpcCore = IsAtLeastGfxCore<IGFX_XE_HPC_CORE>;
|
||||||
using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
|
using IsAtMostXeHpcCore = IsAtMostGfxCore<IGFX_XE_HPC_CORE>;
|
||||||
|
|
||||||
|
using isXeHpOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPG_CORE>;
|
||||||
using isXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
using isXeHpOrXeHpcCore = IsAnyGfxCores<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||||
using isXeHpcOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE_HPG_CORE>;
|
using isXeHpcOrXeHpgCore = IsAnyGfxCores<IGFX_XE_HPC_CORE, IGFX_XE_HPG_CORE>;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user