feature: add initial support for host mapped timestamps

Related-To: LOCI-4171

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2023-06-09 15:07:24 +00:00
committed by Compute-Runtime-Automation
parent 9214d0c635
commit 97b4d8bab5
22 changed files with 813 additions and 28 deletions

View File

@@ -447,6 +447,7 @@ zeGetEventProcAddrTable(
pDdiTable->pfnQueryStatus = L0::zeEventQueryStatus; pDdiTable->pfnQueryStatus = L0::zeEventQueryStatus;
pDdiTable->pfnHostReset = L0::zeEventHostReset; pDdiTable->pfnHostReset = L0::zeEventHostReset;
pDdiTable->pfnQueryKernelTimestamp = L0::zeEventQueryKernelTimestamp; pDdiTable->pfnQueryKernelTimestamp = L0::zeEventQueryKernelTimestamp;
pDdiTable->pfnQueryKernelTimestampsExt = L0::zeEventQueryKernelTimestampsExt;
driverDdiTable.coreDdiTable.Event = *pDdiTable; driverDdiTable.coreDdiTable.Event = *pDdiTable;
if (driverDdiTable.enableTracing) { if (driverDdiTable.enableTracing) {
pDdiTable->pfnCreate = zeEventCreateTracing; pDdiTable->pfnCreate = zeEventCreateTracing;

View File

@@ -100,6 +100,14 @@ ze_result_t zeEventQueryKernelTimestamp(
ze_kernel_timestamp_result_t *timestampType) { ze_kernel_timestamp_result_t *timestampType) {
return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType); return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType);
} }
ze_result_t zeEventQueryKernelTimestampsExt(
ze_event_handle_t hEvent,
ze_device_handle_t hDevice,
uint32_t *pCount,
ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
return L0::Event::fromHandle(hEvent)->queryKernelTimestampsExt(L0::Device::fromHandle(hDevice), pCount, pResults);
}
} // namespace L0 } // namespace L0
extern "C" { extern "C" {
@@ -223,6 +231,18 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp(
dstptr); dstptr);
} }
ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestampsExt(
ze_event_handle_t hEvent,
ze_device_handle_t hDevice,
uint32_t *pCount,
ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
return L0::zeEventQueryKernelTimestampsExt(
hEvent,
hDevice,
pCount,
pResults);
}
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps( ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
uint32_t numEvents, uint32_t numEvents,

View File

@@ -396,6 +396,7 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t commandListPerThreadPrivateScratchSize = 0u; uint32_t commandListPerThreadPrivateScratchSize = 0u;
uint32_t partitionCount = 1; uint32_t partitionCount = 1;
uint32_t defaultMocsIndex = 0; uint32_t defaultMocsIndex = 0;
uint64_t timestampRefreshIntervalInNanoSec = 0;
bool isFlushTaskSubmissionEnabled = false; bool isFlushTaskSubmissionEnabled = false;
bool isSyncModeQueue = false; bool isSyncModeQueue = false;

View File

@@ -18,6 +18,7 @@
#include "shared/source/helpers/blit_properties.h" #include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/definitions/command_encoder_args.h" #include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/logical_state_helper.h" #include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/preamble.h" #include "shared/source/helpers/preamble.h"
@@ -133,6 +134,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
this->ownedPrivateAllocations.clear(); this->ownedPrivateAllocations.clear();
cmdListCurrentStartOffset = 0; cmdListCurrentStartOffset = 0;
mappedTsEventList.clear();
previousSynchronizedTimestamp = {};
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -232,6 +236,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
createLogicalStateHelper(); createLogicalStateHelper();
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
this->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency);
} else {
this->timestampRefreshIntervalInNanoSec = maxKernelTsValue;
}
if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) {
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get();
this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
}
return returnType; return returnType;
} }
@@ -320,7 +337,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions, auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
event, launchParams); event, launchParams);
addToMappedEventList(event);
if (NEO::DebugManager.flags.EnableSWTags.get()) { if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>( neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
*commandContainer.getCommandStream(), *commandContainer.getCommandStream(),
@@ -352,8 +369,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = true; launchParams.isCooperative = true;
return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams); ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams);
addToMappedEventList(event);
return ret;
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
@@ -382,6 +402,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
launchParams.isIndirect = true; launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer, ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams); nullptr, launchParams);
addToMappedEventList(event);
appendSignalEventPostWalker(event); appendSignalEventPostWalker(event);
return ret; return ret;
@@ -427,7 +448,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
return ret; return ret;
} }
} }
addToMappedEventList(event);
appendSignalEventPostWalker(event); appendSignalEventPostWalker(event);
return ret; return ret;
@@ -496,6 +517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
appendEventForProfiling(signalEvent, true); appendEventForProfiling(signalEvent, true);
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
appendSignalEventPostWalker(signalEvent); appendSignalEventPostWalker(signalEvent);
addToMappedEventList(signalEvent);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -562,9 +584,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
} }
if (isCopyOnly()) { if (isCopyOnly()) {
return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(), auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch, {0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event); rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
addToMappedEventList(Event::fromHandle(hEvent));
return status;
} }
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
@@ -639,9 +663,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true; launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
launchParams, relaxedOrderingDispatch); event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(Event::fromHandle(hEvent));
return status;
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
@@ -706,9 +733,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
} }
if (isCopyOnly()) { if (isCopyOnly()) {
return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc, auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch, {pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event); rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
addToMappedEventList(event);
return status;
} }
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
@@ -791,6 +820,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs, ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
addFlushRequiredCommand(allocationStruct.needsFlush, event); addFlushRequiredCommand(allocationStruct.needsFlush, event);
@@ -890,9 +920,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
auto dstSlicePitch = auto dstSlicePitch =
(dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch; (dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch;
return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(), auto status = appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch, {srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event); dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event);
addToMappedEventList(event);
return status;
} }
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
@@ -929,9 +961,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true; launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs, auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch); launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
return status;
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1154,7 +1188,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
commandContainer.addToResidencyContainer(clearColorAllocation); commandContainer.addToResidencyContainer(clearColorAllocation);
appendEventForProfiling(signalEvent, true); appendEventForProfiling(signalEvent, true);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa); NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa);
makeResidentDummyAllocation(); makeResidentDummyAllocation();
@@ -1366,6 +1399,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) { if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
obtainNewTimestampPacketNode(); obtainNewTimestampPacketNode();
@@ -1459,6 +1493,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
return result; return result;
} }
addToMappedEventList(signalEvent);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
@@ -1680,7 +1715,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
} }
if (isCopyOnly()) { if (isCopyOnly()) {
return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
addToMappedEventList(signalEvent);
return status;
} }
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false); ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
@@ -1873,6 +1910,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
} }
} }
addToMappedEventList(signalEvent);
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
@@ -2407,6 +2445,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
} }
commandContainer.addToResidencyContainer(allocationStruct.alloc); commandContainer.addToResidencyContainer(allocationStruct.alloc);
addToMappedEventList(signalEvent);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -2518,6 +2558,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
return appendResult; return appendResult;
} }
addToMappedEventList(Event::fromHandle(hSignalEvent));
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -2886,6 +2928,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendComputeBarrierCommand(); appendComputeBarrierCommand();
} }
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent); appendSignalEventPostWalker(signalEvent);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }

View File

@@ -104,6 +104,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
); );
CommandListImp::storeReferenceTsToMappedEvents(true);
return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo()); return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
} }
@@ -198,6 +200,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
} }
} }
CommandListImp::storeReferenceTsToMappedEvents(true);
return this->csr->flushTask( return this->csr->flushTask(
cmdStreamTask, cmdStreamTask,
taskStartOffset, taskStartOffset,
@@ -875,6 +879,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
} }
if (signalEvent) { if (signalEvent) {
CommandListImp::addToMappedEventList(signalEvent);
CommandListImp::storeReferenceTsToMappedEvents(true);
signalEvent->setGpuStartTimestamp(); signalEvent->setGpuStartTimestamp();
} }

View File

@@ -19,6 +19,7 @@
#include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/os_time.h"
#include "shared/source/os_interface/sys_calls_common.h" #include "shared/source/os_interface/sys_calls_common.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h"
@@ -237,4 +238,32 @@ void CommandListImp::enableInOrderExecution() {
inOrderExecutionEnabled = true; inOrderExecutionEnabled = true;
} }
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
if (mappedTsEventList.size()) {
uint64_t currentCpuTimeStamp = 0;
device->getNEODevice()->getOSTime()->getCpuTime(&currentCpuTimeStamp);
const auto recalculate =
(currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) {
device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp);
}
for (auto &event : mappedTsEventList) {
event->setReferenceTs(previousSynchronizedTimestamp);
}
if (isClearEnabled) {
mappedTsEventList.clear();
}
}
}
void CommandListImp::addToMappedEventList(Event *event) {
if (event && event->hasKerneMappedTsCapability) {
if (std::find(mappedTsEventList.begin(), mappedTsEventList.end(), event) == mappedTsEventList.end()) {
mappedTsEventList.push_back(event);
}
}
}
} // namespace L0 } // namespace L0

View File

@@ -7,6 +7,7 @@
#pragma once #pragma once
#include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/helpers/timestamp_packet_container.h"
#include "shared/source/os_interface/os_time.h"
#include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist.h"
@@ -36,6 +37,9 @@ struct CommandListImp : CommandList {
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties); void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
void enableInOrderExecution(); void enableInOrderExecution();
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; } bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
void storeReferenceTsToMappedEvents(bool clear);
void addToMappedEventList(Event *event);
const std::vector<Event *> &peekMappedEventList() { return mappedTsEventList; }
protected: protected:
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper; std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
@@ -51,6 +55,8 @@ struct CommandListImp : CommandList {
static constexpr bool cmdListDefaultPipelineSelectModeSelected = true; static constexpr bool cmdListDefaultPipelineSelectModeSelected = true;
static constexpr bool cmdListDefaultMediaSamplerClockGate = false; static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
static constexpr bool cmdListDefaultGlobalAtomics = false; static constexpr bool cmdListDefaultGlobalAtomics = false;
std::vector<Event *> mappedTsEventList{};
NEO::TimeStampData previousSynchronizedTimestamp{};
}; };
} // namespace L0 } // namespace L0

View File

@@ -222,6 +222,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->programStateSipEndWA(ctx.stateSipRequired, child); this->programStateSipEndWA(ctx.stateSipRequired, child);
this->assignCsrTaskCountToFenceIfAvailable(hFence); this->assignCsrTaskCountToFenceIfAvailable(hFence);
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child); this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = static_cast<CommandListImp *>(CommandList::fromHandle(commandListHandles[i]));
commandList->storeReferenceTsToMappedEvents(false);
}
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child); auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
this->csr->setPreemptionMode(ctx.statePreemption); this->csr->setPreemptionMode(ctx.statePreemption);
@@ -288,6 +294,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
this->assignCsrTaskCountToFenceIfAvailable(hFence); this->assignCsrTaskCountToFenceIfAvailable(hFence);
this->programLastCommandListReturnBbStart(child, ctx); this->programLastCommandListReturnBbStart(child, ctx);
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = static_cast<CommandListImp *>(CommandList::fromHandle(phCommandLists[i]));
commandList->storeReferenceTsToMappedEvents(false);
}
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child); this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child);
this->makeCsrTagAllocationResident(); this->makeCsrTagAllocationResident();

View File

@@ -876,6 +876,9 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
NEO::Device *activeDevice = getActiveDevice(); NEO::Device *activeDevice = getActiveDevice();
auto &compilerProductHelper = activeDevice->getCompilerProductHelper(); auto &compilerProductHelper = activeDevice->getCompilerProductHelper();
zeDeviceIpVersion->ipVersion = compilerProductHelper.getHwIpVersion(hardwareInfo); zeDeviceIpVersion->ipVersion = compilerProductHelper.getHwIpVersion(hardwareInfo);
} else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES) {
ze_event_query_kernel_timestamps_ext_properties_t *kernelTimestampExtProperties = reinterpret_cast<ze_event_query_kernel_timestamps_ext_properties_t *>(extendedProperties);
kernelTimestampExtProperties->flags = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL | ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED;
} }
extendedProperties = static_cast<ze_base_properties_t *>(extendedProperties->pNext); extendedProperties = static_cast<ze_base_properties_t *>(extendedProperties->pNext);
} }

View File

@@ -119,7 +119,8 @@ struct DriverHandleImp : public DriverHandle {
{ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_CURRENT}, {ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_CURRENT},
{ZE_CACHE_RESERVATION_EXT_NAME, ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT}, {ZE_CACHE_RESERVATION_EXT_NAME, ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT},
{ZE_IMAGE_VIEW_EXT_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT}, {ZE_IMAGE_VIEW_EXT_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT},
{ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT}}; {ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT},
{ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME, ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_CURRENT}};
uint64_t uuidTimestamp = 0u; uint64_t uuidTimestamp = 0u;

View File

@@ -9,6 +9,7 @@
#include "shared/source/helpers/timestamp_packet_constants.h" #include "shared/source/helpers/timestamp_packet_constants.h"
#include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/helpers/timestamp_packet_container.h"
#include "shared/source/memory_manager/multi_graphics_allocation.h" #include "shared/source/memory_manager/multi_graphics_allocation.h"
#include "shared/source/os_interface/os_time.h"
#include <level_zero/ze_api.h> #include <level_zero/ze_api.h>
@@ -69,6 +70,8 @@ struct Event : _ze_event_handle_t {
virtual ze_result_t reset() = 0; virtual ze_result_t reset() = 0;
virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0; virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0;
virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0; virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0;
virtual ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) = 0;
enum State : uint32_t { enum State : uint32_t {
STATE_SIGNALED = 0u, STATE_SIGNALED = 0u,
HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits<uint32_t>::max() - 2, HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits<uint32_t>::max() - 2,
@@ -210,6 +213,10 @@ struct Event : _ze_event_handle_t {
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes); void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
bool isInOrderExecEvent() const { return inOrderExecEvent; } bool isInOrderExecEvent() const { return inOrderExecEvent; }
const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); } const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); }
void setReferenceTs(NEO::TimeStampData &timestamp) {
referenceTs = timestamp;
}
bool hasKerneMappedTsCapability = false;
protected: protected:
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {} Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
@@ -218,6 +225,7 @@ struct Event : _ze_event_handle_t {
uint64_t globalEndTS = 1; uint64_t globalEndTS = 1;
uint64_t contextStartTS = 1; uint64_t contextStartTS = 1;
uint64_t contextEndTS = 1; uint64_t contextEndTS = 1;
NEO::TimeStampData referenceTs{};
std::chrono::microseconds gpuHangCheckPeriod{500'000}; std::chrono::microseconds gpuHangCheckPeriod{500'000};
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel; std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
@@ -268,6 +276,9 @@ struct EventPool : _ze_event_pool_handle_t {
DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles); DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) { EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) {
eventPoolFlags = desc->flags; eventPoolFlags = desc->flags;
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
}
} }
virtual ~EventPool(); virtual ~EventPool();
MOCKABLE_VIRTUAL ze_result_t destroy(); MOCKABLE_VIRTUAL ze_result_t destroy();
@@ -299,6 +310,13 @@ struct EventPool : _ze_event_pool_handle_t {
return false; return false;
} }
bool isEventPoolKerneMappedTsFlagSet() const {
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
return true;
}
return false;
}
uint32_t getMaxKernelCount() const { uint32_t getMaxKernelCount() const {
return maxKernelCount; return maxKernelCount;
} }

View File

@@ -52,6 +52,7 @@ struct EventImp : public Event {
ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override; ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override;
ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override; ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override;
ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override;
void resetDeviceCompletionData(bool resetAllPackets); void resetDeviceCompletionData(bool resetAllPackets);
void resetKernelCountAndPacketUsedCount() override; void resetKernelCountAndPacketUsedCount() override;
@@ -74,6 +75,8 @@ struct EventImp : public Event {
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal); ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address); MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address);
void setRemainingPackets(TagSizeT eventVal, void *nextPacketAddress, uint32_t packetsAlreadySet); void setRemainingPackets(TagSizeT eventVal, void *nextPacketAddress, uint32_t packetsAlreadySet);
void getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer);
}; };
} // namespace L0 } // namespace L0

View File

@@ -9,6 +9,7 @@
#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/sub_device.h" #include "shared/source/device/sub_device.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/os_time.h"
@@ -32,6 +33,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->setEventTimestampFlag(true); event->setEventTimestampFlag(true);
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize()); event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
} }
event->hasKerneMappedTsCapability = eventPool->isEventPoolKerneMappedTsFlagSet();
auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwInfo = neoDevice->getHardwareInfo();
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
@@ -494,6 +496,85 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *cou
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
template <typename TagSizeT>
void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer) {
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, frequency);
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
auto getDuration = [&](uint64_t startTs, uint64_t endTs) {
const uint64_t maxValue = maxKernelTsValue;
startTs &= maxValue;
endTs &= maxValue;
if (startTs > endTs) {
// Resolve overflows
return endTs + (maxValue - startTs);
} else {
return endTs - startTs;
}
};
const auto &referenceHostTsInNs = referenceTs.cpuTimeinNS;
// High Level Approach:
// startTimeStamp = (referenceHostTsInNs - submitDeviceTs) + kernelDeviceTsStart
// deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart
// endTimeStamp = startTimeStamp + deviceDuration
// Get offset between Device and Host timestamps
const int64_t tsOffsetInNs = referenceHostTsInNs - deviceTsInNs;
auto calculateSynchronizedTs = [&](ze_synchronized_timestamp_data_ext_t *synchronizedTs, const ze_kernel_timestamp_data_t *deviceTs) {
// Add the offset to the kernel timestamp to find the start timestamp on the CPU timescale
int64_t offset = tsOffsetInNs;
uint64_t startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * frequency) + offset;
if (startTimeStampInNs < referenceHostTsInNs) {
offset += static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * frequency);
startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * frequency) + offset;
}
// Get the kernel timestamp duration
uint64_t deviceDuration = getDuration(deviceTs->kernelStart, deviceTs->kernelEnd);
uint64_t deviceDurationNs = static_cast<uint64_t>(deviceDuration * frequency);
// Add the duration to the startTimeStamp to get the endTimeStamp
uint64_t endTimeStampInNs = startTimeStampInNs + deviceDurationNs;
synchronizedTs->kernelStart = startTimeStampInNs;
synchronizedTs->kernelEnd = endTimeStampInNs;
};
for (uint32_t index = 0; index < count; index++) {
calculateSynchronizedTs(&pSynchronizedTimestampsBuffer[index].global, &pKernelTimestampsBuffer[index].global);
pSynchronizedTimestampsBuffer[index].context.kernelStart = pSynchronizedTimestampsBuffer[index].global.kernelStart;
uint64_t deviceDuration = getDuration(pKernelTimestampsBuffer[index].context.kernelStart,
pKernelTimestampsBuffer[index].context.kernelEnd);
uint64_t deviceDurationNs = static_cast<uint64_t>(deviceDuration * frequency);
pSynchronizedTimestampsBuffer[index].context.kernelEnd = pSynchronizedTimestampsBuffer[index].context.kernelStart +
deviceDurationNs;
}
}
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
if (*pCount == 0) {
return queryTimestampsExp(device, pCount, nullptr);
}
ze_result_t status = queryTimestampsExp(device, pCount, pResults->pKernelTimestampsBuffer);
if (status == ZE_RESULT_SUCCESS && hasKerneMappedTsCapability) {
getSynchronizedKernelTimestamps(pResults->pSynchronizedTimestampsBuffer, *pCount, pResults->pKernelTimestampsBuffer);
}
return status;
}
template <typename TagSizeT> template <typename TagSizeT>
uint32_t EventImp<TagSizeT>::getPacketsInUse() const { uint32_t EventImp<TagSizeT>::getPacketsInUse() const {
uint32_t packetsInUse = 0; uint32_t packetsInUse = 0;

View File

@@ -7,6 +7,7 @@
#include "zello_common.h" #include "zello_common.h"
#include <algorithm>
#include <cstring> #include <cstring>
#include <functional> #include <functional>
#include <map> #include <map>
@@ -27,6 +28,20 @@ inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
return binaryFile; return binaryFile;
} }
void createImmediateCommandList(ze_device_handle_t &device,
ze_context_handle_t &context,
bool syncMode,
ze_command_list_handle_t &cmdList) {
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
cmdQueueDesc.pNext = nullptr;
cmdQueueDesc.flags = 0;
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
cmdQueueDesc.index = 0;
selectQueueMode(cmdQueueDesc, syncMode);
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
}
void createCmdQueueAndCmdList(ze_context_handle_t &context, void createCmdQueueAndCmdList(ze_context_handle_t &context,
ze_device_handle_t &device, ze_device_handle_t &device,
ze_command_queue_handle_t &cmdQueue, ze_command_queue_handle_t &cmdQueue,
@@ -59,6 +74,21 @@ void createCmdQueueAndCmdList(ze_context_handle_t &context,
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
} }
void createImmediateCommandList(ze_device_handle_t &device,
ze_context_handle_t &context,
uint32_t queueGroupOrdinal,
bool syncMode,
ze_command_list_handle_t &cmdList) {
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
cmdQueueDesc.pNext = nullptr;
cmdQueueDesc.flags = 0;
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
cmdQueueDesc.ordinal = queueGroupOrdinal;
cmdQueueDesc.index = 0;
selectQueueMode(cmdQueueDesc, syncMode);
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
}
bool testWriteGlobalTimestamp(int argc, char *argv[], bool testWriteGlobalTimestamp(int argc, char *argv[],
ze_context_handle_t &context, ze_context_handle_t &context,
ze_driver_handle_t &driver, ze_driver_handle_t &driver,
@@ -394,7 +424,8 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
ze_event_pool_handle_t eventPool; ze_event_pool_handle_t eventPool;
ze_event_handle_t kernelTsEvent; ze_event_handle_t kernelTsEvent;
bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow"); bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-d", "--runTillDeviceTsOverflow");
bool runTillKernelTsOverflows = isParamEnabled(argc, argv, "-k", "--runTillKernelTsOverflow");
// Create commandQueue and cmdList // Create commandQueue and cmdList
createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); createCmdQueueAndCmdList(context, device, cmdQueue, cmdList);
@@ -489,7 +520,7 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
} }
}; };
uint64_t unusedHostTs, referenceDeviceTs; uint64_t unusedHostTs, referenceDeviceTs, referenceKernelTs = 0;
SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &unusedHostTs, &referenceDeviceTs)); SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &unusedHostTs, &referenceDeviceTs));
std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "\n"; std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "\n";
@@ -502,6 +533,12 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max())); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast<ze_kernel_timestamp_result_t *>(timestampBuffer); ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast<ze_kernel_timestamp_result_t *>(timestampBuffer);
auto currMinKernelTs = std::min(kernelTsResults->global.kernelStart, kernelTsResults->global.kernelEnd);
if (referenceKernelTs == 0) {
referenceKernelTs = currMinKernelTs;
std::cout << "ReferencekernelTs: " << referenceKernelTs << "\n";
}
// High Level Approach: // High Level Approach:
// startTimeStamp = (submitHostTs - submitDeviceTs) + kernelDeviceTsStart // startTimeStamp = (submitHostTs - submitDeviceTs) + kernelDeviceTsStart
// deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart // deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart
@@ -528,10 +565,14 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
std::cout << " | submit[host,device]: [" << submitHostTs << ", " << submitDeviceTs << "]"; std::cout << " | submit[host,device]: [" << submitHostTs << ", " << submitDeviceTs << "]";
std::cout << " | deviceTsOnHostTimescale[start, end] : [" << startTimeStamp << ", " << endTimeStamp << " ] \n"; std::cout << " | deviceTsOnHostTimescale[start, end] : [" << startTimeStamp << ", " << endTimeStamp << " ] \n";
++iter; ++iter;
if (runTillDeviceTsOverflows) { if (runTillDeviceTsOverflows || runTillKernelTsOverflows) {
i = 0; i = 0;
if (referenceDeviceTs > submitDeviceTs) { if (runTillDeviceTsOverflows && referenceDeviceTs > submitDeviceTs) {
runTillDeviceTsOverflows = false; runTillKernelTsOverflows = runTillDeviceTsOverflows = false;
}
if (runTillKernelTsOverflows && referenceKernelTs > currMinKernelTs) {
runTillKernelTsOverflows = runTillDeviceTsOverflows = false;
} }
} }
} }
@@ -549,6 +590,216 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
return true; return true;
} }
bool testKernelMappedTimestampMap(int argc, char *argv[],
ze_context_handle_t &context,
ze_driver_handle_t &driver,
ze_device_handle_t &device) {
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
ze_module_handle_t module;
ze_kernel_handle_t kernel;
void *srcBuffer = nullptr;
void *dstBuffer = nullptr;
void *timestampBuffer = nullptr;
ze_event_pool_handle_t eventPool;
constexpr uint32_t maxEventUsageCount = 3;
uint32_t eventUsageCount = maxEventUsageCount;
constexpr size_t allocSize = 4096;
ze_group_count_t dispatchTraits;
bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow");
bool useSingleCommand = isParamEnabled(argc, argv, "-s", "--useSingleCommand");
bool useImmediate = isParamEnabled(argc, argv, "-i", "--useImmediate");
int defaultVerboseLevel = 1;
int verboseLevel = getParamValue(argc, argv, "-l", "--verboseLevel", defaultVerboseLevel);
if (useSingleCommand) {
eventUsageCount = 1;
}
ze_event_handle_t kernelTsEvent[maxEventUsageCount];
createEventPoolAndEvents(context, device, eventPool,
(ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP), maxEventUsageCount, kernelTsEvent,
ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_HOST);
// Create commandQueue and cmdList
if (useImmediate) {
createImmediateCommandList(device, context, false, cmdList);
} else {
createCmdQueueAndCmdList(context, device, cmdQueue, cmdList);
}
auto prepareKernel = [&]() {
// Create two shared buffers
ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
deviceDesc.ordinal = 0;
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, sizeof(ze_kernel_timestamp_result_t), 1, &timestampBuffer));
// Initialize memory
constexpr uint8_t val = 55;
memset(srcBuffer, val, allocSize);
memset(dstBuffer, 0, allocSize);
memset(timestampBuffer, 0, sizeof(ze_kernel_timestamp_result_t));
// Create kernel
auto spirvModule = loadBinaryFile("copy_buffer_to_buffer.spv");
if (spirvModule.size() == 0) {
return false;
}
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(spirvModule.data());
moduleDesc.inputSize = spirvModule.size();
SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernelDesc.pKernelName = "CopyBufferToBufferBytes";
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
uint32_t groupSizeX = 32u;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, static_cast<uint32_t>(allocSize), 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
uint32_t offset = 0;
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
dispatchTraits.groupCountX = static_cast<uint32_t>(allocSize) / groupSizeX;
dispatchTraits.groupCountY = 1u;
dispatchTraits.groupCountZ = 1u;
return true;
};
uint64_t previousMaximumSyncTs = std::numeric_limits<uint64_t>::min();
uint64_t referenceMinimumGlobalTs = 0;
prepareKernel();
if (!useImmediate) {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr));
if (!useSingleCommand) {
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr));
}
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
}
for (uint32_t i = 0; i < 10; i++) {
if (!useImmediate) {
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
} else {
// Immediate Commandlist case
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr));
if (!useSingleCommand) {
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr));
}
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(cmdList, std::numeric_limits<uint64_t>::max()));
}
uint64_t currentMinimumSyncTs = std::numeric_limits<uint64_t>::max();
uint64_t currentMaximumSyncTs = std::numeric_limits<uint64_t>::min();
uint64_t currentMinimumGlobalTs = std::numeric_limits<uint64_t>::max();
for (uint32_t j = 0; j < eventUsageCount; j++) {
uint32_t count = 0;
if (verboseLevel == 1) {
std::cout << "[iter(" << i << ")][event(" << j << ")]====>\n";
}
SUCCESS_OR_TERMINATE(zeEventQueryStatus(kernelTsEvent[j]));
SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, nullptr));
if (count == 0) {
return false;
}
std::vector<ze_kernel_timestamp_result_t> timestampResult(count);
std::vector<ze_synchronized_timestamp_result_ext_t> syncTimestampResult(count);
ze_event_query_kernel_timestamps_results_ext_properties_t properties = {};
properties.pNext = nullptr;
properties.pKernelTimestampsBuffer = timestampResult.data();
properties.pSynchronizedTimestampsBuffer = syncTimestampResult.data();
SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, &properties));
for (uint32_t k = 0; k < count; k++) {
const auto &ts = properties.pKernelTimestampsBuffer[k];
const auto &syncTs = properties.pSynchronizedTimestampsBuffer[k];
currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelStart);
currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelEnd);
currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelStart);
currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelEnd);
currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelStart);
currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelEnd);
if (verboseLevel == 1) {
std::cout << "\t[packedId:" << k << " ]"
<< "[global-ts(" << ts.global.kernelStart << " , " << ts.global.kernelEnd << " ) "
<< "| syncTs( " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd << " )] "
<< "# [context-ts( " << ts.context.kernelStart << " , " << ts.context.kernelEnd << " ) "
<< "| syncTs ( " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << " )]\n";
}
if (verboseLevel == 2) {
std::cout << "KernelSyncTs: " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd
<< " | ContextSyncTs: " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << "\n";
}
}
SUCCESS_OR_TERMINATE(zeEventHostReset(kernelTsEvent[j]));
}
if (currentMinimumSyncTs < previousMaximumSyncTs) {
std::cout << "\n\n!!FAILED: Current Minimum Ts : " << currentMinimumSyncTs << " less than Previous Maximum Ts : " << previousMaximumSyncTs << "\n\n";
return false;
}
previousMaximumSyncTs = currentMaximumSyncTs;
if (!referenceMinimumGlobalTs) {
referenceMinimumGlobalTs = currentMinimumGlobalTs;
} else {
if (runTillDeviceTsOverflows) {
if (currentMinimumGlobalTs < referenceMinimumGlobalTs) {
runTillDeviceTsOverflows = false;
}
i = 0;
}
}
}
// Cleanup
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, timestampBuffer));
for (uint32_t j = 0; j < eventUsageCount; j++) {
SUCCESS_OR_TERMINATE(zeEventDestroy(kernelTsEvent[j]));
}
SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
if (!useImmediate) {
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel));
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
return true;
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
const std::string blackBoxName("Zello Timestamp"); const std::string blackBoxName("Zello Timestamp");
verbose = isVerbose(argc, argv); verbose = isVerbose(argc, argv);
@@ -563,6 +814,7 @@ int main(int argc, char *argv[]) {
supportedTests["testKernelTimestampAppendQueryWithDeviceProperties"] = testKernelTimestampAppendQueryWithDeviceProperties; supportedTests["testKernelTimestampAppendQueryWithDeviceProperties"] = testKernelTimestampAppendQueryWithDeviceProperties;
supportedTests["testWriteGlobalTimestamp"] = testWriteGlobalTimestamp; supportedTests["testWriteGlobalTimestamp"] = testWriteGlobalTimestamp;
supportedTests["testKernelTimestampHostQuery"] = testKernelTimestampHostQuery; supportedTests["testKernelTimestampHostQuery"] = testKernelTimestampHostQuery;
supportedTests["testKernelMappedTimestampMap"] = testKernelMappedTimestampMap;
const char *defaultString = "testKernelTimestampAppendQueryWithDeviceProperties"; const char *defaultString = "testKernelTimestampAppendQueryWithDeviceProperties";
const char *test = getParamValue(argc, argv, "-t", "--test", defaultString); const char *test = getParamValue(argc, argv, "-t", "--test", defaultString);

View File

@@ -76,6 +76,7 @@ struct Mock<Event> : public Event {
ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr)); ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr));
ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps)); ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps));
ADDMETHOD_NOBASE(queryKernelTimestampsExt, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults));
// Fake an allocation for event memory // Fake an allocation for event memory
alignas(16) uint32_t memory = -1; alignas(16) uint32_t memory = -1;
@@ -154,6 +155,9 @@ class MockEvent : public ::L0::Event {
ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override { ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override {
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
ze_result_t queryKernelTimestampsExt(L0::Device *device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override {
return ZE_RESULT_SUCCESS;
}
uint32_t getPacketsUsedInLastKernel() override { return 1; } uint32_t getPacketsUsedInLastKernel() override { return 1; }
uint32_t getPacketsInUse() const override { return 1; } uint32_t getPacketsInUse() const override { return 1; }
void resetPackets(bool resetAllPackets) override {} void resetPackets(bool resetAllPackets) override {}

View File

@@ -3058,5 +3058,147 @@ HWTEST2_F(CommandListHostSynchronize, whenHostSychronizeIsCalledReturnInvalidArg
commandList->destroy(); commandList->destroy();
} }
using CommandListMappedTimestampTest = CommandListAppendLaunchKernel;
HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledThenTheEventIsAddedToMappedSignalList, IsAtLeastSkl) {
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]);
}
HWTEST2_F(CommandListMappedTimestampTest, givenSignalEventWithoutMappedTimstampWhenAppendApiIsCalledThenTheEventIsNotAddedToMappedSignalList, IsAtLeastSkl) {
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
}
HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledMultipleTimesThenTheEventIsAddedOnceToMappedSignalList, IsAtLeastSkl) {
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendBarrier(event->toHandle(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]);
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
}
HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhenStoringReferenceTimestampWithClearThenEventsAreCleared, IsAtLeastSkl) {
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
neoDevice->setOSTime(new MockOSTime());
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(true);
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
}
HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) {
DebugManagerStateRestore restorer;
createKernel();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
eventPoolDesc.count = 2;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp());
auto osTime = static_cast<MockOSTimeWithConfigurableCpuTimestamp *>(neoDevice->getOSTime());
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
commandList->addToMappedEventList(event.get());
osTime->mockCpuTime = 1;
commandList->storeReferenceTsToMappedEvents(false);
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(false);
osTime->mockCpuTime = 2;
commandList->addToMappedEventList(event.get());
commandList->storeReferenceTsToMappedEvents(true);
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -4513,5 +4513,20 @@ TEST(DeviceReturnSubDevicesAsApiDevicesTest, GivenReturnSubDevicesAsApiDevicesIs
multiDeviceFixture.tearDown(); multiDeviceFixture.tearDown();
} }
TEST_F(DeviceTest, GivenValidDeviceWhenQueryingKernelTimestampsProptertiesThenCorrectPropertiesIsReturned) {
ze_device_properties_t devProps;
ze_event_query_kernel_timestamps_ext_properties_t tsProps;
devProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
devProps.pNext = &tsProps;
tsProps.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES;
tsProps.pNext = nullptr;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetProperties(device, &devProps));
EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL);
EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED);
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -2078,6 +2078,142 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim
} }
} }
using EventqueryKernelTimestampsExt = Test<EventUsedPacketSignalFixture<1, 1, 0, -1>>;
TEST_F(EventqueryKernelTimestampsExt, givenpCountLargerThanSupportedWhenCallingQueryKernelTimestampsExtThenpCountSetProperly) {
uint32_t pCount = 10;
event->setPacketsInUse(2u);
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
results.pSynchronizedTimestampsBuffer = nullptr;
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, pCount);
}
TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryKernelTimestampsExtReturnsUnsupported) {
DebugManagerStateRestore restore;
NEO::DebugManager.flags.EnableStaticPartitioning.set(0);
event->hasKerneMappedTsCapability = true;
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
results.pSynchronizedTimestampsBuffer = nullptr;
uint32_t pCount = 10;
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
}
TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhenQueryKernelTimestampsExtIsCalledCorrectValuesAreReturned) {
typename MockTimestampPackets32::Packet packetData[3];
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits = 32;
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
event->setPacketsInUse(3u);
event->hasKerneMappedTsCapability = true;
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
const int64_t gpuReferenceTimeInNs = 2000;
const int64_t cpuReferenceTimeInNs = 3000;
const auto maxKernelTsValue = maxNBitValue(32);
NEO::TimeStampData referenceTs{static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs};
event->setReferenceTs(referenceTs);
auto timeToTimeStamp = [&](uint32_t timeInNs) {
return static_cast<uint32_t>(timeInNs / deviceTsFrequency);
};
packetData[0].contextStart = 50u;
packetData[0].contextEnd = 100u;
packetData[0].globalStart = timeToTimeStamp(4000u);
packetData[0].globalEnd = timeToTimeStamp(5000u);
// Device Ts overflow case
packetData[1].contextStart = 20u;
packetData[1].contextEnd = 30u;
packetData[1].globalStart = timeToTimeStamp(500u);
packetData[1].globalEnd = timeToTimeStamp(1500u);
packetData[2].contextStart = 20u;
packetData[2].contextEnd = 30u;
packetData[2].globalStart = timeToTimeStamp(5000u);
packetData[2].globalEnd = timeToTimeStamp(500u);
event->hostAddress = packetData;
uint32_t count = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, nullptr));
EXPECT_EQ(count, 3u);
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(count);
std::vector<ze_synchronized_timestamp_result_ext_t> synchronizedTsBuffer(count);
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
results.pSynchronizedTimestampsBuffer = synchronizedTsBuffer.data();
for (uint32_t packetId = 0; packetId < count; packetId++) {
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
}
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, &results));
uint64_t errorOffset = 5;
// Packet 1
auto expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 4000u;
auto expectedGlobalEnd = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u;
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd + errorOffset);
auto expectedContextStart = expectedGlobalStart;
auto expectedContextEnd = expectedContextStart + (packetData[0].contextEnd - packetData[0].contextStart) * deviceTsFrequency;
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd + errorOffset);
// Packet 2
expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 500u +
static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * deviceTsFrequency);
expectedGlobalEnd = expectedGlobalStart + (1500 - 500);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd + errorOffset);
expectedContextStart = expectedGlobalStart;
expectedContextEnd = expectedContextStart + (packetData[1].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd + errorOffset);
// Packet 3
expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u;
expectedGlobalEnd = expectedGlobalStart + (static_cast<uint64_t>(maxKernelTsValue * deviceTsFrequency) - 5000u + 500u);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd + errorOffset);
expectedContextStart = expectedGlobalStart;
expectedContextEnd = expectedContextStart + (packetData[2].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart + errorOffset);
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd - errorOffset);
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset);
}
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) { HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
typename MockTimestampPackets32::Packet data = {}; typename MockTimestampPackets32::Packet data = {};
data.contextStart = 1u; data.contextStart = 1u;

View File

@@ -550,6 +550,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists") DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists")
DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum") DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum")
DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, CommandListTimestampRefreshIntervalInMilliSec, -1, "-1: use driver default, This value sets the refresh interval for getting synchronized GPU and CPU timestamp")
/* Binary Cache */ /* Binary Cache */
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation") DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2023 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -10,6 +10,7 @@
#include "shared/source/os_interface/linux/device_time_drm.h" #include "shared/source/os_interface/linux/device_time_drm.h"
#include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_interface.h"
#include <chrono>
#include <time.h> #include <time.h>
namespace NEO { namespace NEO {

View File

@@ -88,4 +88,14 @@ class MockOSTimeWithConstTimestamp : public OSTime {
return 0; return 0;
} }
}; };
class MockOSTimeWithConfigurableCpuTimestamp : public MockOSTimeWithConstTimestamp {
public:
uint64_t mockCpuTime = 0;
bool getCpuTime(uint64_t *timeStamp) override {
*timeStamp = mockCpuTime;
return true;
}
};
} // namespace NEO } // namespace NEO

View File

@@ -536,4 +536,5 @@ VfBarResourceAllocationWa = 1
EnableDynamicPostSyncAllocLayout = -1 EnableDynamicPostSyncAllocLayout = -1
ForceNumberOfThreadsInGpgpuThreadGroup = -1 ForceNumberOfThreadsInGpgpuThreadGroup = -1
PrintTimestampPacketUsage = -1 PrintTimestampPacketUsage = -1
CommandListTimestampRefreshIntervalInMilliSec = -1
# Please don't edit below this line # Please don't edit below this line