mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
feature: add initial support for host mapped timestamps
Related-To: LOCI-4171 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9214d0c635
commit
97b4d8bab5
@@ -447,6 +447,7 @@ zeGetEventProcAddrTable(
|
|||||||
pDdiTable->pfnQueryStatus = L0::zeEventQueryStatus;
|
pDdiTable->pfnQueryStatus = L0::zeEventQueryStatus;
|
||||||
pDdiTable->pfnHostReset = L0::zeEventHostReset;
|
pDdiTable->pfnHostReset = L0::zeEventHostReset;
|
||||||
pDdiTable->pfnQueryKernelTimestamp = L0::zeEventQueryKernelTimestamp;
|
pDdiTable->pfnQueryKernelTimestamp = L0::zeEventQueryKernelTimestamp;
|
||||||
|
pDdiTable->pfnQueryKernelTimestampsExt = L0::zeEventQueryKernelTimestampsExt;
|
||||||
driverDdiTable.coreDdiTable.Event = *pDdiTable;
|
driverDdiTable.coreDdiTable.Event = *pDdiTable;
|
||||||
if (driverDdiTable.enableTracing) {
|
if (driverDdiTable.enableTracing) {
|
||||||
pDdiTable->pfnCreate = zeEventCreateTracing;
|
pDdiTable->pfnCreate = zeEventCreateTracing;
|
||||||
|
|||||||
@@ -100,6 +100,14 @@ ze_result_t zeEventQueryKernelTimestamp(
|
|||||||
ze_kernel_timestamp_result_t *timestampType) {
|
ze_kernel_timestamp_result_t *timestampType) {
|
||||||
return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType);
|
return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ze_result_t zeEventQueryKernelTimestampsExt(
|
||||||
|
ze_event_handle_t hEvent,
|
||||||
|
ze_device_handle_t hDevice,
|
||||||
|
uint32_t *pCount,
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
|
||||||
|
return L0::Event::fromHandle(hEvent)->queryKernelTimestampsExt(L0::Device::fromHandle(hDevice), pCount, pResults);
|
||||||
|
}
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@@ -223,6 +231,18 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp(
|
|||||||
dstptr);
|
dstptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestampsExt(
|
||||||
|
ze_event_handle_t hEvent,
|
||||||
|
ze_device_handle_t hDevice,
|
||||||
|
uint32_t *pCount,
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
|
||||||
|
return L0::zeEventQueryKernelTimestampsExt(
|
||||||
|
hEvent,
|
||||||
|
hDevice,
|
||||||
|
pCount,
|
||||||
|
pResults);
|
||||||
|
}
|
||||||
|
|
||||||
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps(
|
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps(
|
||||||
ze_command_list_handle_t hCommandList,
|
ze_command_list_handle_t hCommandList,
|
||||||
uint32_t numEvents,
|
uint32_t numEvents,
|
||||||
|
|||||||
@@ -396,6 +396,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
uint32_t commandListPerThreadPrivateScratchSize = 0u;
|
uint32_t commandListPerThreadPrivateScratchSize = 0u;
|
||||||
uint32_t partitionCount = 1;
|
uint32_t partitionCount = 1;
|
||||||
uint32_t defaultMocsIndex = 0;
|
uint32_t defaultMocsIndex = 0;
|
||||||
|
uint64_t timestampRefreshIntervalInNanoSec = 0;
|
||||||
|
|
||||||
bool isFlushTaskSubmissionEnabled = false;
|
bool isFlushTaskSubmissionEnabled = false;
|
||||||
bool isSyncModeQueue = false;
|
bool isSyncModeQueue = false;
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
#include "shared/source/helpers/blit_properties.h"
|
#include "shared/source/helpers/blit_properties.h"
|
||||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||||
#include "shared/source/helpers/gfx_core_helper.h"
|
#include "shared/source/helpers/gfx_core_helper.h"
|
||||||
|
#include "shared/source/helpers/hw_info.h"
|
||||||
#include "shared/source/helpers/logical_state_helper.h"
|
#include "shared/source/helpers/logical_state_helper.h"
|
||||||
#include "shared/source/helpers/pipe_control_args.h"
|
#include "shared/source/helpers/pipe_control_args.h"
|
||||||
#include "shared/source/helpers/preamble.h"
|
#include "shared/source/helpers/preamble.h"
|
||||||
@@ -133,6 +134,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
|||||||
this->ownedPrivateAllocations.clear();
|
this->ownedPrivateAllocations.clear();
|
||||||
cmdListCurrentStartOffset = 0;
|
cmdListCurrentStartOffset = 0;
|
||||||
|
|
||||||
|
mappedTsEventList.clear();
|
||||||
|
previousSynchronizedTimestamp = {};
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -232,6 +236,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
|||||||
|
|
||||||
createLogicalStateHelper();
|
createLogicalStateHelper();
|
||||||
|
|
||||||
|
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||||
|
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||||
|
if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) {
|
||||||
|
this->timestampRefreshIntervalInNanoSec = static_cast<uint64_t>(maxKernelTsValue * frequency);
|
||||||
|
} else {
|
||||||
|
this->timestampRefreshIntervalInNanoSec = maxKernelTsValue;
|
||||||
|
}
|
||||||
|
if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) {
|
||||||
|
constexpr uint32_t milliSecondsToNanoSeconds = 1000000u;
|
||||||
|
const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get();
|
||||||
|
this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
return returnType;
|
return returnType;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,7 +337,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
|||||||
|
|
||||||
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
||||||
event, launchParams);
|
event, launchParams);
|
||||||
|
addToMappedEventList(event);
|
||||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||||
*commandContainer.getCommandStream(),
|
*commandContainer.getCommandStream(),
|
||||||
@@ -352,8 +369,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
|||||||
|
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
launchParams.isCooperative = true;
|
launchParams.isCooperative = true;
|
||||||
return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
|
|
||||||
event, launchParams);
|
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
|
||||||
|
event, launchParams);
|
||||||
|
addToMappedEventList(event);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -382,6 +402,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
|||||||
launchParams.isIndirect = true;
|
launchParams.isIndirect = true;
|
||||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
||||||
nullptr, launchParams);
|
nullptr, launchParams);
|
||||||
|
addToMappedEventList(event);
|
||||||
appendSignalEventPostWalker(event);
|
appendSignalEventPostWalker(event);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -427,7 +448,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
addToMappedEventList(event);
|
||||||
appendSignalEventPostWalker(event);
|
appendSignalEventPostWalker(event);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -496,6 +517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
|||||||
appendEventForProfiling(signalEvent, true);
|
appendEventForProfiling(signalEvent, true);
|
||||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||||
appendSignalEventPostWalker(signalEvent);
|
appendSignalEventPostWalker(signalEvent);
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -562,9 +584,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
|
auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
|
||||||
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
|
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
|
||||||
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
|
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
|
||||||
|
addToMappedEventList(Event::fromHandle(hEvent));
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||||
@@ -639,9 +663,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
|||||||
|
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
launchParams.isBuiltInKernel = true;
|
launchParams.isBuiltInKernel = true;
|
||||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
|
||||||
event, numWaitEvents, phWaitEvents,
|
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
||||||
launchParams, relaxedOrderingDispatch);
|
event, numWaitEvents, phWaitEvents,
|
||||||
|
launchParams, relaxedOrderingDispatch);
|
||||||
|
addToMappedEventList(Event::fromHandle(hEvent));
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -706,9 +733,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
|
auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
|
||||||
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
|
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
|
||||||
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
|
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
|
||||||
|
addToMappedEventList(event);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||||
@@ -791,6 +820,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
|||||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
||||||
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
|
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
|
||||||
|
addToMappedEventList(event);
|
||||||
|
|
||||||
addFlushRequiredCommand(allocationStruct.needsFlush, event);
|
addFlushRequiredCommand(allocationStruct.needsFlush, event);
|
||||||
|
|
||||||
@@ -890,9 +920,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
|||||||
auto dstSlicePitch =
|
auto dstSlicePitch =
|
||||||
(dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch;
|
(dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch;
|
||||||
|
|
||||||
return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
|
auto status = appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
|
||||||
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
|
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
|
||||||
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event);
|
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event);
|
||||||
|
addToMappedEventList(event);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||||
@@ -929,9 +961,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
|||||||
|
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
launchParams.isBuiltInKernel = true;
|
launchParams.isBuiltInKernel = true;
|
||||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
|
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
|
||||||
event, numWaitEvents, phWaitEvents,
|
event, numWaitEvents, phWaitEvents,
|
||||||
launchParams, relaxedOrderingDispatch);
|
launchParams, relaxedOrderingDispatch);
|
||||||
|
addToMappedEventList(event);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -1154,7 +1188,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
|||||||
commandContainer.addToResidencyContainer(clearColorAllocation);
|
commandContainer.addToResidencyContainer(clearColorAllocation);
|
||||||
|
|
||||||
appendEventForProfiling(signalEvent, true);
|
appendEventForProfiling(signalEvent, true);
|
||||||
|
|
||||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa);
|
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa);
|
||||||
makeResidentDummyAllocation();
|
makeResidentDummyAllocation();
|
||||||
|
|
||||||
@@ -1366,6 +1399,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
||||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
|
if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
|
||||||
obtainNewTimestampPacketNode();
|
obtainNewTimestampPacketNode();
|
||||||
@@ -1459,6 +1493,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||||
|
|
||||||
if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
|
if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
|
||||||
@@ -1680,7 +1715,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
||||||
@@ -1873,6 +1910,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
||||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
||||||
|
|
||||||
@@ -2407,6 +2445,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
|||||||
}
|
}
|
||||||
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
commandContainer.addToResidencyContainer(allocationStruct.alloc);
|
||||||
|
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2518,6 +2558,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
|||||||
return appendResult;
|
return appendResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addToMappedEventList(Event::fromHandle(hSignalEvent));
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2886,6 +2928,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||||||
appendComputeBarrierCommand();
|
appendComputeBarrierCommand();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addToMappedEventList(signalEvent);
|
||||||
appendSignalEventPostWalker(signalEvent);
|
appendSignalEventPostWalker(signalEvent);
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -104,6 +104,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
|
|||||||
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||||
|
|
||||||
return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
|
return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -198,6 +200,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||||
|
|
||||||
return this->csr->flushTask(
|
return this->csr->flushTask(
|
||||||
cmdStreamTask,
|
cmdStreamTask,
|
||||||
taskStartOffset,
|
taskStartOffset,
|
||||||
@@ -875,6 +879,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (signalEvent) {
|
if (signalEvent) {
|
||||||
|
CommandListImp::addToMappedEventList(signalEvent);
|
||||||
|
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||||
signalEvent->setGpuStartTimestamp();
|
signalEvent->setGpuStartTimestamp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
#include "shared/source/memory_manager/allocation_properties.h"
|
#include "shared/source/memory_manager/allocation_properties.h"
|
||||||
#include "shared/source/memory_manager/memory_manager.h"
|
#include "shared/source/memory_manager/memory_manager.h"
|
||||||
#include "shared/source/os_interface/os_context.h"
|
#include "shared/source/os_interface/os_context.h"
|
||||||
|
#include "shared/source/os_interface/os_time.h"
|
||||||
#include "shared/source/os_interface/sys_calls_common.h"
|
#include "shared/source/os_interface/sys_calls_common.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
|
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
|
||||||
@@ -237,4 +238,32 @@ void CommandListImp::enableInOrderExecution() {
|
|||||||
inOrderExecutionEnabled = true;
|
inOrderExecutionEnabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {
|
||||||
|
if (mappedTsEventList.size()) {
|
||||||
|
uint64_t currentCpuTimeStamp = 0;
|
||||||
|
device->getNEODevice()->getOSTime()->getCpuTime(¤tCpuTimeStamp);
|
||||||
|
const auto recalculate =
|
||||||
|
(currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||||
|
if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) {
|
||||||
|
device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &event : mappedTsEventList) {
|
||||||
|
event->setReferenceTs(previousSynchronizedTimestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isClearEnabled) {
|
||||||
|
mappedTsEventList.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandListImp::addToMappedEventList(Event *event) {
|
||||||
|
if (event && event->hasKerneMappedTsCapability) {
|
||||||
|
if (std::find(mappedTsEventList.begin(), mappedTsEventList.end(), event) == mappedTsEventList.end()) {
|
||||||
|
mappedTsEventList.push_back(event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "shared/source/helpers/timestamp_packet_container.h"
|
#include "shared/source/helpers/timestamp_packet_container.h"
|
||||||
|
#include "shared/source/os_interface/os_time.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||||
|
|
||||||
@@ -36,6 +37,9 @@ struct CommandListImp : CommandList {
|
|||||||
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
|
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
|
||||||
void enableInOrderExecution();
|
void enableInOrderExecution();
|
||||||
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
|
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
|
||||||
|
void storeReferenceTsToMappedEvents(bool clear);
|
||||||
|
void addToMappedEventList(Event *event);
|
||||||
|
const std::vector<Event *> &peekMappedEventList() { return mappedTsEventList; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
|
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
|
||||||
@@ -51,6 +55,8 @@ struct CommandListImp : CommandList {
|
|||||||
static constexpr bool cmdListDefaultPipelineSelectModeSelected = true;
|
static constexpr bool cmdListDefaultPipelineSelectModeSelected = true;
|
||||||
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
|
static constexpr bool cmdListDefaultMediaSamplerClockGate = false;
|
||||||
static constexpr bool cmdListDefaultGlobalAtomics = false;
|
static constexpr bool cmdListDefaultGlobalAtomics = false;
|
||||||
|
std::vector<Event *> mappedTsEventList{};
|
||||||
|
NEO::TimeStampData previousSynchronizedTimestamp{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -222,6 +222,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
|||||||
this->programStateSipEndWA(ctx.stateSipRequired, child);
|
this->programStateSipEndWA(ctx.stateSipRequired, child);
|
||||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||||
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
|
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
|
||||||
|
|
||||||
|
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||||
|
auto commandList = static_cast<CommandListImp *>(CommandList::fromHandle(commandListHandles[i]));
|
||||||
|
commandList->storeReferenceTsToMappedEvents(false);
|
||||||
|
}
|
||||||
|
|
||||||
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
|
auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child);
|
||||||
|
|
||||||
this->csr->setPreemptionMode(ctx.statePreemption);
|
this->csr->setPreemptionMode(ctx.statePreemption);
|
||||||
@@ -288,6 +294,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
|
|||||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||||
|
|
||||||
this->programLastCommandListReturnBbStart(child, ctx);
|
this->programLastCommandListReturnBbStart(child, ctx);
|
||||||
|
|
||||||
|
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||||
|
auto commandList = static_cast<CommandListImp *>(CommandList::fromHandle(phCommandLists[i]));
|
||||||
|
commandList->storeReferenceTsToMappedEvents(false);
|
||||||
|
}
|
||||||
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child);
|
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child);
|
||||||
|
|
||||||
this->makeCsrTagAllocationResident();
|
this->makeCsrTagAllocationResident();
|
||||||
|
|||||||
@@ -876,6 +876,9 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
|
|||||||
NEO::Device *activeDevice = getActiveDevice();
|
NEO::Device *activeDevice = getActiveDevice();
|
||||||
auto &compilerProductHelper = activeDevice->getCompilerProductHelper();
|
auto &compilerProductHelper = activeDevice->getCompilerProductHelper();
|
||||||
zeDeviceIpVersion->ipVersion = compilerProductHelper.getHwIpVersion(hardwareInfo);
|
zeDeviceIpVersion->ipVersion = compilerProductHelper.getHwIpVersion(hardwareInfo);
|
||||||
|
} else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES) {
|
||||||
|
ze_event_query_kernel_timestamps_ext_properties_t *kernelTimestampExtProperties = reinterpret_cast<ze_event_query_kernel_timestamps_ext_properties_t *>(extendedProperties);
|
||||||
|
kernelTimestampExtProperties->flags = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL | ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED;
|
||||||
}
|
}
|
||||||
extendedProperties = static_cast<ze_base_properties_t *>(extendedProperties->pNext);
|
extendedProperties = static_cast<ze_base_properties_t *>(extendedProperties->pNext);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -119,7 +119,8 @@ struct DriverHandleImp : public DriverHandle {
|
|||||||
{ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_CURRENT},
|
{ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_CURRENT},
|
||||||
{ZE_CACHE_RESERVATION_EXT_NAME, ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT},
|
{ZE_CACHE_RESERVATION_EXT_NAME, ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT},
|
||||||
{ZE_IMAGE_VIEW_EXT_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT},
|
{ZE_IMAGE_VIEW_EXT_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT},
|
||||||
{ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT}};
|
{ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT},
|
||||||
|
{ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME, ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_CURRENT}};
|
||||||
|
|
||||||
uint64_t uuidTimestamp = 0u;
|
uint64_t uuidTimestamp = 0u;
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include "shared/source/helpers/timestamp_packet_constants.h"
|
#include "shared/source/helpers/timestamp_packet_constants.h"
|
||||||
#include "shared/source/helpers/timestamp_packet_container.h"
|
#include "shared/source/helpers/timestamp_packet_container.h"
|
||||||
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
||||||
|
#include "shared/source/os_interface/os_time.h"
|
||||||
|
|
||||||
#include <level_zero/ze_api.h>
|
#include <level_zero/ze_api.h>
|
||||||
|
|
||||||
@@ -69,6 +70,8 @@ struct Event : _ze_event_handle_t {
|
|||||||
virtual ze_result_t reset() = 0;
|
virtual ze_result_t reset() = 0;
|
||||||
virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0;
|
virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0;
|
||||||
virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0;
|
virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0;
|
||||||
|
virtual ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) = 0;
|
||||||
|
|
||||||
enum State : uint32_t {
|
enum State : uint32_t {
|
||||||
STATE_SIGNALED = 0u,
|
STATE_SIGNALED = 0u,
|
||||||
HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits<uint32_t>::max() - 2,
|
HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits<uint32_t>::max() - 2,
|
||||||
@@ -210,6 +213,10 @@ struct Event : _ze_event_handle_t {
|
|||||||
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
|
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
|
||||||
bool isInOrderExecEvent() const { return inOrderExecEvent; }
|
bool isInOrderExecEvent() const { return inOrderExecEvent; }
|
||||||
const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); }
|
const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); }
|
||||||
|
void setReferenceTs(NEO::TimeStampData ×tamp) {
|
||||||
|
referenceTs = timestamp;
|
||||||
|
}
|
||||||
|
bool hasKerneMappedTsCapability = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
||||||
@@ -218,6 +225,7 @@ struct Event : _ze_event_handle_t {
|
|||||||
uint64_t globalEndTS = 1;
|
uint64_t globalEndTS = 1;
|
||||||
uint64_t contextStartTS = 1;
|
uint64_t contextStartTS = 1;
|
||||||
uint64_t contextEndTS = 1;
|
uint64_t contextEndTS = 1;
|
||||||
|
NEO::TimeStampData referenceTs{};
|
||||||
|
|
||||||
std::chrono::microseconds gpuHangCheckPeriod{500'000};
|
std::chrono::microseconds gpuHangCheckPeriod{500'000};
|
||||||
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
|
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
|
||||||
@@ -268,6 +276,9 @@ struct EventPool : _ze_event_pool_handle_t {
|
|||||||
DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
|
DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
|
||||||
EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) {
|
EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) {
|
||||||
eventPoolFlags = desc->flags;
|
eventPoolFlags = desc->flags;
|
||||||
|
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
|
||||||
|
eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
virtual ~EventPool();
|
virtual ~EventPool();
|
||||||
MOCKABLE_VIRTUAL ze_result_t destroy();
|
MOCKABLE_VIRTUAL ze_result_t destroy();
|
||||||
@@ -299,6 +310,13 @@ struct EventPool : _ze_event_pool_handle_t {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isEventPoolKerneMappedTsFlagSet() const {
|
||||||
|
if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t getMaxKernelCount() const {
|
uint32_t getMaxKernelCount() const {
|
||||||
return maxKernelCount;
|
return maxKernelCount;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ struct EventImp : public Event {
|
|||||||
|
|
||||||
ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override;
|
ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override;
|
||||||
ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override;
|
ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override;
|
||||||
|
ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override;
|
||||||
|
|
||||||
void resetDeviceCompletionData(bool resetAllPackets);
|
void resetDeviceCompletionData(bool resetAllPackets);
|
||||||
void resetKernelCountAndPacketUsedCount() override;
|
void resetKernelCountAndPacketUsedCount() override;
|
||||||
@@ -74,6 +75,8 @@ struct EventImp : public Event {
|
|||||||
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
|
||||||
MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address);
|
MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address);
|
||||||
void setRemainingPackets(TagSizeT eventVal, void *nextPacketAddress, uint32_t packetsAlreadySet);
|
void setRemainingPackets(TagSizeT eventVal, void *nextPacketAddress, uint32_t packetsAlreadySet);
|
||||||
|
void getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,
|
||||||
|
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
@@ -9,6 +9,7 @@
|
|||||||
#include "shared/source/command_container/implicit_scaling.h"
|
#include "shared/source/command_container/implicit_scaling.h"
|
||||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||||
#include "shared/source/device/sub_device.h"
|
#include "shared/source/device/sub_device.h"
|
||||||
|
#include "shared/source/helpers/hw_info.h"
|
||||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||||
#include "shared/source/os_interface/os_time.h"
|
#include "shared/source/os_interface/os_time.h"
|
||||||
@@ -32,6 +33,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
|||||||
event->setEventTimestampFlag(true);
|
event->setEventTimestampFlag(true);
|
||||||
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
|
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
|
||||||
}
|
}
|
||||||
|
event->hasKerneMappedTsCapability = eventPool->isEventPoolKerneMappedTsFlagSet();
|
||||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||||
|
|
||||||
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
|
event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo);
|
||||||
@@ -494,6 +496,85 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *cou
|
|||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename TagSizeT>
|
||||||
|
void EventImp<TagSizeT>::getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,
|
||||||
|
const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer) {
|
||||||
|
|
||||||
|
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
|
||||||
|
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
|
||||||
|
const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||||
|
auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, frequency);
|
||||||
|
const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits);
|
||||||
|
|
||||||
|
auto getDuration = [&](uint64_t startTs, uint64_t endTs) {
|
||||||
|
const uint64_t maxValue = maxKernelTsValue;
|
||||||
|
startTs &= maxValue;
|
||||||
|
endTs &= maxValue;
|
||||||
|
|
||||||
|
if (startTs > endTs) {
|
||||||
|
// Resolve overflows
|
||||||
|
return endTs + (maxValue - startTs);
|
||||||
|
} else {
|
||||||
|
return endTs - startTs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto &referenceHostTsInNs = referenceTs.cpuTimeinNS;
|
||||||
|
|
||||||
|
// High Level Approach:
|
||||||
|
// startTimeStamp = (referenceHostTsInNs - submitDeviceTs) + kernelDeviceTsStart
|
||||||
|
// deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart
|
||||||
|
// endTimeStamp = startTimeStamp + deviceDuration
|
||||||
|
|
||||||
|
// Get offset between Device and Host timestamps
|
||||||
|
const int64_t tsOffsetInNs = referenceHostTsInNs - deviceTsInNs;
|
||||||
|
|
||||||
|
auto calculateSynchronizedTs = [&](ze_synchronized_timestamp_data_ext_t *synchronizedTs, const ze_kernel_timestamp_data_t *deviceTs) {
|
||||||
|
// Add the offset to the kernel timestamp to find the start timestamp on the CPU timescale
|
||||||
|
int64_t offset = tsOffsetInNs;
|
||||||
|
uint64_t startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * frequency) + offset;
|
||||||
|
if (startTimeStampInNs < referenceHostTsInNs) {
|
||||||
|
offset += static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * frequency);
|
||||||
|
startTimeStampInNs = static_cast<uint64_t>(deviceTs->kernelStart * frequency) + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the kernel timestamp duration
|
||||||
|
uint64_t deviceDuration = getDuration(deviceTs->kernelStart, deviceTs->kernelEnd);
|
||||||
|
uint64_t deviceDurationNs = static_cast<uint64_t>(deviceDuration * frequency);
|
||||||
|
// Add the duration to the startTimeStamp to get the endTimeStamp
|
||||||
|
uint64_t endTimeStampInNs = startTimeStampInNs + deviceDurationNs;
|
||||||
|
|
||||||
|
synchronizedTs->kernelStart = startTimeStampInNs;
|
||||||
|
synchronizedTs->kernelEnd = endTimeStampInNs;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (uint32_t index = 0; index < count; index++) {
|
||||||
|
calculateSynchronizedTs(&pSynchronizedTimestampsBuffer[index].global, &pKernelTimestampsBuffer[index].global);
|
||||||
|
|
||||||
|
pSynchronizedTimestampsBuffer[index].context.kernelStart = pSynchronizedTimestampsBuffer[index].global.kernelStart;
|
||||||
|
uint64_t deviceDuration = getDuration(pKernelTimestampsBuffer[index].context.kernelStart,
|
||||||
|
pKernelTimestampsBuffer[index].context.kernelEnd);
|
||||||
|
uint64_t deviceDurationNs = static_cast<uint64_t>(deviceDuration * frequency);
|
||||||
|
pSynchronizedTimestampsBuffer[index].context.kernelEnd = pSynchronizedTimestampsBuffer[index].context.kernelStart +
|
||||||
|
deviceDurationNs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TagSizeT>
|
||||||
|
ze_result_t EventImp<TagSizeT>::queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) {
|
||||||
|
|
||||||
|
if (*pCount == 0) {
|
||||||
|
return queryTimestampsExp(device, pCount, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_result_t status = queryTimestampsExp(device, pCount, pResults->pKernelTimestampsBuffer);
|
||||||
|
|
||||||
|
if (status == ZE_RESULT_SUCCESS && hasKerneMappedTsCapability) {
|
||||||
|
getSynchronizedKernelTimestamps(pResults->pSynchronizedTimestampsBuffer, *pCount, pResults->pKernelTimestampsBuffer);
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename TagSizeT>
|
template <typename TagSizeT>
|
||||||
uint32_t EventImp<TagSizeT>::getPacketsInUse() const {
|
uint32_t EventImp<TagSizeT>::getPacketsInUse() const {
|
||||||
uint32_t packetsInUse = 0;
|
uint32_t packetsInUse = 0;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "zello_common.h"
|
#include "zello_common.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <map>
|
#include <map>
|
||||||
@@ -27,6 +28,20 @@ inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
|
|||||||
return binaryFile;
|
return binaryFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void createImmediateCommandList(ze_device_handle_t &device,
|
||||||
|
ze_context_handle_t &context,
|
||||||
|
bool syncMode,
|
||||||
|
ze_command_list_handle_t &cmdList) {
|
||||||
|
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||||
|
cmdQueueDesc.pNext = nullptr;
|
||||||
|
cmdQueueDesc.flags = 0;
|
||||||
|
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||||
|
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
|
||||||
|
cmdQueueDesc.index = 0;
|
||||||
|
selectQueueMode(cmdQueueDesc, syncMode);
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
|
||||||
|
}
|
||||||
|
|
||||||
void createCmdQueueAndCmdList(ze_context_handle_t &context,
|
void createCmdQueueAndCmdList(ze_context_handle_t &context,
|
||||||
ze_device_handle_t &device,
|
ze_device_handle_t &device,
|
||||||
ze_command_queue_handle_t &cmdQueue,
|
ze_command_queue_handle_t &cmdQueue,
|
||||||
@@ -59,6 +74,21 @@ void createCmdQueueAndCmdList(ze_context_handle_t &context,
|
|||||||
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
|
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void createImmediateCommandList(ze_device_handle_t &device,
|
||||||
|
ze_context_handle_t &context,
|
||||||
|
uint32_t queueGroupOrdinal,
|
||||||
|
bool syncMode,
|
||||||
|
ze_command_list_handle_t &cmdList) {
|
||||||
|
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||||
|
cmdQueueDesc.pNext = nullptr;
|
||||||
|
cmdQueueDesc.flags = 0;
|
||||||
|
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||||
|
cmdQueueDesc.ordinal = queueGroupOrdinal;
|
||||||
|
cmdQueueDesc.index = 0;
|
||||||
|
selectQueueMode(cmdQueueDesc, syncMode);
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
|
||||||
|
}
|
||||||
|
|
||||||
bool testWriteGlobalTimestamp(int argc, char *argv[],
|
bool testWriteGlobalTimestamp(int argc, char *argv[],
|
||||||
ze_context_handle_t &context,
|
ze_context_handle_t &context,
|
||||||
ze_driver_handle_t &driver,
|
ze_driver_handle_t &driver,
|
||||||
@@ -394,7 +424,8 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
|
|||||||
ze_event_pool_handle_t eventPool;
|
ze_event_pool_handle_t eventPool;
|
||||||
ze_event_handle_t kernelTsEvent;
|
ze_event_handle_t kernelTsEvent;
|
||||||
|
|
||||||
bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow");
|
bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-d", "--runTillDeviceTsOverflow");
|
||||||
|
bool runTillKernelTsOverflows = isParamEnabled(argc, argv, "-k", "--runTillKernelTsOverflow");
|
||||||
|
|
||||||
// Create commandQueue and cmdList
|
// Create commandQueue and cmdList
|
||||||
createCmdQueueAndCmdList(context, device, cmdQueue, cmdList);
|
createCmdQueueAndCmdList(context, device, cmdQueue, cmdList);
|
||||||
@@ -489,7 +520,7 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64_t unusedHostTs, referenceDeviceTs;
|
uint64_t unusedHostTs, referenceDeviceTs, referenceKernelTs = 0;
|
||||||
|
|
||||||
SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &unusedHostTs, &referenceDeviceTs));
|
SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &unusedHostTs, &referenceDeviceTs));
|
||||||
std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "\n";
|
std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "\n";
|
||||||
@@ -502,6 +533,12 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
|
|||||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||||
ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast<ze_kernel_timestamp_result_t *>(timestampBuffer);
|
ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast<ze_kernel_timestamp_result_t *>(timestampBuffer);
|
||||||
|
|
||||||
|
auto currMinKernelTs = std::min(kernelTsResults->global.kernelStart, kernelTsResults->global.kernelEnd);
|
||||||
|
if (referenceKernelTs == 0) {
|
||||||
|
referenceKernelTs = currMinKernelTs;
|
||||||
|
std::cout << "ReferencekernelTs: " << referenceKernelTs << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
// High Level Approach:
|
// High Level Approach:
|
||||||
// startTimeStamp = (submitHostTs - submitDeviceTs) + kernelDeviceTsStart
|
// startTimeStamp = (submitHostTs - submitDeviceTs) + kernelDeviceTsStart
|
||||||
// deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart
|
// deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart
|
||||||
@@ -528,10 +565,14 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
|
|||||||
std::cout << " | submit[host,device]: [" << submitHostTs << ", " << submitDeviceTs << "]";
|
std::cout << " | submit[host,device]: [" << submitHostTs << ", " << submitDeviceTs << "]";
|
||||||
std::cout << " | deviceTsOnHostTimescale[start, end] : [" << startTimeStamp << ", " << endTimeStamp << " ] \n";
|
std::cout << " | deviceTsOnHostTimescale[start, end] : [" << startTimeStamp << ", " << endTimeStamp << " ] \n";
|
||||||
++iter;
|
++iter;
|
||||||
if (runTillDeviceTsOverflows) {
|
if (runTillDeviceTsOverflows || runTillKernelTsOverflows) {
|
||||||
i = 0;
|
i = 0;
|
||||||
if (referenceDeviceTs > submitDeviceTs) {
|
if (runTillDeviceTsOverflows && referenceDeviceTs > submitDeviceTs) {
|
||||||
runTillDeviceTsOverflows = false;
|
runTillKernelTsOverflows = runTillDeviceTsOverflows = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runTillKernelTsOverflows && referenceKernelTs > currMinKernelTs) {
|
||||||
|
runTillKernelTsOverflows = runTillDeviceTsOverflows = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -549,6 +590,216 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[],
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool testKernelMappedTimestampMap(int argc, char *argv[],
|
||||||
|
ze_context_handle_t &context,
|
||||||
|
ze_driver_handle_t &driver,
|
||||||
|
ze_device_handle_t &device) {
|
||||||
|
|
||||||
|
ze_command_queue_handle_t cmdQueue;
|
||||||
|
ze_command_list_handle_t cmdList;
|
||||||
|
ze_module_handle_t module;
|
||||||
|
ze_kernel_handle_t kernel;
|
||||||
|
void *srcBuffer = nullptr;
|
||||||
|
void *dstBuffer = nullptr;
|
||||||
|
void *timestampBuffer = nullptr;
|
||||||
|
ze_event_pool_handle_t eventPool;
|
||||||
|
constexpr uint32_t maxEventUsageCount = 3;
|
||||||
|
uint32_t eventUsageCount = maxEventUsageCount;
|
||||||
|
constexpr size_t allocSize = 4096;
|
||||||
|
ze_group_count_t dispatchTraits;
|
||||||
|
|
||||||
|
bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow");
|
||||||
|
bool useSingleCommand = isParamEnabled(argc, argv, "-s", "--useSingleCommand");
|
||||||
|
bool useImmediate = isParamEnabled(argc, argv, "-i", "--useImmediate");
|
||||||
|
int defaultVerboseLevel = 1;
|
||||||
|
int verboseLevel = getParamValue(argc, argv, "-l", "--verboseLevel", defaultVerboseLevel);
|
||||||
|
|
||||||
|
if (useSingleCommand) {
|
||||||
|
eventUsageCount = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_event_handle_t kernelTsEvent[maxEventUsageCount];
|
||||||
|
createEventPoolAndEvents(context, device, eventPool,
|
||||||
|
(ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP), maxEventUsageCount, kernelTsEvent,
|
||||||
|
ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_HOST);
|
||||||
|
|
||||||
|
// Create commandQueue and cmdList
|
||||||
|
if (useImmediate) {
|
||||||
|
createImmediateCommandList(device, context, false, cmdList);
|
||||||
|
} else {
|
||||||
|
createCmdQueueAndCmdList(context, device, cmdQueue, cmdList);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto prepareKernel = [&]() {
|
||||||
|
// Create two shared buffers
|
||||||
|
ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
|
||||||
|
deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||||
|
deviceDesc.ordinal = 0;
|
||||||
|
|
||||||
|
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||||
|
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||||
|
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, sizeof(ze_kernel_timestamp_result_t), 1, ×tampBuffer));
|
||||||
|
|
||||||
|
// Initialize memory
|
||||||
|
constexpr uint8_t val = 55;
|
||||||
|
memset(srcBuffer, val, allocSize);
|
||||||
|
memset(dstBuffer, 0, allocSize);
|
||||||
|
memset(timestampBuffer, 0, sizeof(ze_kernel_timestamp_result_t));
|
||||||
|
|
||||||
|
// Create kernel
|
||||||
|
auto spirvModule = loadBinaryFile("copy_buffer_to_buffer.spv");
|
||||||
|
if (spirvModule.size() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
|
||||||
|
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
|
||||||
|
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(spirvModule.data());
|
||||||
|
moduleDesc.inputSize = spirvModule.size();
|
||||||
|
SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
|
||||||
|
|
||||||
|
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||||
|
kernelDesc.pKernelName = "CopyBufferToBufferBytes";
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||||
|
|
||||||
|
uint32_t groupSizeX = 32u;
|
||||||
|
uint32_t groupSizeY = 1u;
|
||||||
|
uint32_t groupSizeZ = 1u;
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, static_cast<uint32_t>(allocSize), 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||||
|
|
||||||
|
uint32_t offset = 0;
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
|
||||||
|
|
||||||
|
dispatchTraits.groupCountX = static_cast<uint32_t>(allocSize) / groupSizeX;
|
||||||
|
dispatchTraits.groupCountY = 1u;
|
||||||
|
dispatchTraits.groupCountZ = 1u;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint64_t previousMaximumSyncTs = std::numeric_limits<uint64_t>::min();
|
||||||
|
uint64_t referenceMinimumGlobalTs = 0;
|
||||||
|
|
||||||
|
prepareKernel();
|
||||||
|
if (!useImmediate) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr));
|
||||||
|
if (!useSingleCommand) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr));
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr));
|
||||||
|
}
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 10; i++) {
|
||||||
|
|
||||||
|
if (!useImmediate) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||||
|
} else {
|
||||||
|
// Immediate Commandlist case
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr));
|
||||||
|
if (!useSingleCommand) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr));
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr));
|
||||||
|
}
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(cmdList, std::numeric_limits<uint64_t>::max()));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t currentMinimumSyncTs = std::numeric_limits<uint64_t>::max();
|
||||||
|
uint64_t currentMaximumSyncTs = std::numeric_limits<uint64_t>::min();
|
||||||
|
uint64_t currentMinimumGlobalTs = std::numeric_limits<uint64_t>::max();
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < eventUsageCount; j++) {
|
||||||
|
uint32_t count = 0;
|
||||||
|
if (verboseLevel == 1) {
|
||||||
|
std::cout << "[iter(" << i << ")][event(" << j << ")]====>\n";
|
||||||
|
}
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventQueryStatus(kernelTsEvent[j]));
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, nullptr));
|
||||||
|
if (count == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<ze_kernel_timestamp_result_t> timestampResult(count);
|
||||||
|
std::vector<ze_synchronized_timestamp_result_ext_t> syncTimestampResult(count);
|
||||||
|
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t properties = {};
|
||||||
|
properties.pNext = nullptr;
|
||||||
|
properties.pKernelTimestampsBuffer = timestampResult.data();
|
||||||
|
properties.pSynchronizedTimestampsBuffer = syncTimestampResult.data();
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, &properties));
|
||||||
|
|
||||||
|
for (uint32_t k = 0; k < count; k++) {
|
||||||
|
const auto &ts = properties.pKernelTimestampsBuffer[k];
|
||||||
|
const auto &syncTs = properties.pSynchronizedTimestampsBuffer[k];
|
||||||
|
|
||||||
|
currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelStart);
|
||||||
|
currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelEnd);
|
||||||
|
currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelStart);
|
||||||
|
currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelEnd);
|
||||||
|
|
||||||
|
currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelStart);
|
||||||
|
currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelEnd);
|
||||||
|
|
||||||
|
if (verboseLevel == 1) {
|
||||||
|
std::cout << "\t[packedId:" << k << " ]"
|
||||||
|
<< "[global-ts(" << ts.global.kernelStart << " , " << ts.global.kernelEnd << " ) "
|
||||||
|
<< "| syncTs( " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd << " )] "
|
||||||
|
<< "# [context-ts( " << ts.context.kernelStart << " , " << ts.context.kernelEnd << " ) "
|
||||||
|
<< "| syncTs ( " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << " )]\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verboseLevel == 2) {
|
||||||
|
std::cout << "KernelSyncTs: " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd
|
||||||
|
<< " | ContextSyncTs: " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventHostReset(kernelTsEvent[j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentMinimumSyncTs < previousMaximumSyncTs) {
|
||||||
|
std::cout << "\n\n!!FAILED: Current Minimum Ts : " << currentMinimumSyncTs << " less than Previous Maximum Ts : " << previousMaximumSyncTs << "\n\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
previousMaximumSyncTs = currentMaximumSyncTs;
|
||||||
|
|
||||||
|
if (!referenceMinimumGlobalTs) {
|
||||||
|
referenceMinimumGlobalTs = currentMinimumGlobalTs;
|
||||||
|
} else {
|
||||||
|
if (runTillDeviceTsOverflows) {
|
||||||
|
if (currentMinimumGlobalTs < referenceMinimumGlobalTs) {
|
||||||
|
runTillDeviceTsOverflows = false;
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||||
|
SUCCESS_OR_TERMINATE(zeMemFree(context, timestampBuffer));
|
||||||
|
for (uint32_t j = 0; j < eventUsageCount; j++) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventDestroy(kernelTsEvent[j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool));
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||||
|
if (!useImmediate) {
|
||||||
|
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||||
|
}
|
||||||
|
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel));
|
||||||
|
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
const std::string blackBoxName("Zello Timestamp");
|
const std::string blackBoxName("Zello Timestamp");
|
||||||
verbose = isVerbose(argc, argv);
|
verbose = isVerbose(argc, argv);
|
||||||
@@ -563,6 +814,7 @@ int main(int argc, char *argv[]) {
|
|||||||
supportedTests["testKernelTimestampAppendQueryWithDeviceProperties"] = testKernelTimestampAppendQueryWithDeviceProperties;
|
supportedTests["testKernelTimestampAppendQueryWithDeviceProperties"] = testKernelTimestampAppendQueryWithDeviceProperties;
|
||||||
supportedTests["testWriteGlobalTimestamp"] = testWriteGlobalTimestamp;
|
supportedTests["testWriteGlobalTimestamp"] = testWriteGlobalTimestamp;
|
||||||
supportedTests["testKernelTimestampHostQuery"] = testKernelTimestampHostQuery;
|
supportedTests["testKernelTimestampHostQuery"] = testKernelTimestampHostQuery;
|
||||||
|
supportedTests["testKernelMappedTimestampMap"] = testKernelMappedTimestampMap;
|
||||||
|
|
||||||
const char *defaultString = "testKernelTimestampAppendQueryWithDeviceProperties";
|
const char *defaultString = "testKernelTimestampAppendQueryWithDeviceProperties";
|
||||||
const char *test = getParamValue(argc, argv, "-t", "--test", defaultString);
|
const char *test = getParamValue(argc, argv, "-t", "--test", defaultString);
|
||||||
|
|||||||
@@ -76,6 +76,7 @@ struct Mock<Event> : public Event {
|
|||||||
ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ());
|
ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||||
ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr));
|
ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr));
|
||||||
ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps));
|
ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps));
|
||||||
|
ADDMETHOD_NOBASE(queryKernelTimestampsExt, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults));
|
||||||
|
|
||||||
// Fake an allocation for event memory
|
// Fake an allocation for event memory
|
||||||
alignas(16) uint32_t memory = -1;
|
alignas(16) uint32_t memory = -1;
|
||||||
@@ -154,6 +155,9 @@ class MockEvent : public ::L0::Event {
|
|||||||
ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override {
|
ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override {
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
ze_result_t queryKernelTimestampsExt(L0::Device *device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override {
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
uint32_t getPacketsUsedInLastKernel() override { return 1; }
|
uint32_t getPacketsUsedInLastKernel() override { return 1; }
|
||||||
uint32_t getPacketsInUse() const override { return 1; }
|
uint32_t getPacketsInUse() const override { return 1; }
|
||||||
void resetPackets(bool resetAllPackets) override {}
|
void resetPackets(bool resetAllPackets) override {}
|
||||||
|
|||||||
@@ -3058,5 +3058,147 @@ HWTEST2_F(CommandListHostSynchronize, whenHostSychronizeIsCalledReturnInvalidArg
|
|||||||
commandList->destroy();
|
commandList->destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using CommandListMappedTimestampTest = CommandListAppendLaunchKernel;
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledThenTheEventIsAddedToMappedSignalList, IsAtLeastSkl) {
|
||||||
|
createKernel();
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||||
|
eventPoolDesc.count = 2;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.wait = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||||
|
EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListMappedTimestampTest, givenSignalEventWithoutMappedTimstampWhenAppendApiIsCalledThenTheEventIsNotAddedToMappedSignalList, IsAtLeastSkl) {
|
||||||
|
createKernel();
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||||
|
eventPoolDesc.count = 2;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.wait = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||||
|
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledMultipleTimesThenTheEventIsAddedOnceToMappedSignalList, IsAtLeastSkl) {
|
||||||
|
createKernel();
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||||
|
eventPoolDesc.count = 2;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.wait = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||||
|
returnValue = commandList->appendBarrier(event->toHandle(), 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||||
|
|
||||||
|
EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]);
|
||||||
|
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhenStoringReferenceTimestampWithClearThenEventsAreCleared, IsAtLeastSkl) {
|
||||||
|
createKernel();
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||||
|
eventPoolDesc.count = 2;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.wait = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
neoDevice->setOSTime(new MockOSTime());
|
||||||
|
commandList->addToMappedEventList(event.get());
|
||||||
|
commandList->storeReferenceTsToMappedEvents(true);
|
||||||
|
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) {
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
createKernel();
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP;
|
||||||
|
eventPoolDesc.count = 2;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.wait = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
std::unique_ptr<L0::Event> event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
|
||||||
|
NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0);
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp());
|
||||||
|
auto osTime = static_cast<MockOSTimeWithConfigurableCpuTimestamp *>(neoDevice->getOSTime());
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||||
|
commandList->addToMappedEventList(event.get());
|
||||||
|
osTime->mockCpuTime = 1;
|
||||||
|
commandList->storeReferenceTsToMappedEvents(false);
|
||||||
|
EXPECT_EQ(1u, commandList->peekMappedEventList().size());
|
||||||
|
commandList->addToMappedEventList(event.get());
|
||||||
|
commandList->storeReferenceTsToMappedEvents(false);
|
||||||
|
osTime->mockCpuTime = 2;
|
||||||
|
commandList->addToMappedEventList(event.get());
|
||||||
|
commandList->storeReferenceTsToMappedEvents(true);
|
||||||
|
EXPECT_EQ(0u, commandList->peekMappedEventList().size());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -4513,5 +4513,20 @@ TEST(DeviceReturnSubDevicesAsApiDevicesTest, GivenReturnSubDevicesAsApiDevicesIs
|
|||||||
multiDeviceFixture.tearDown();
|
multiDeviceFixture.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DeviceTest, GivenValidDeviceWhenQueryingKernelTimestampsProptertiesThenCorrectPropertiesIsReturned) {
|
||||||
|
ze_device_properties_t devProps;
|
||||||
|
ze_event_query_kernel_timestamps_ext_properties_t tsProps;
|
||||||
|
|
||||||
|
devProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
||||||
|
devProps.pNext = &tsProps;
|
||||||
|
|
||||||
|
tsProps.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES;
|
||||||
|
tsProps.pNext = nullptr;
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetProperties(device, &devProps));
|
||||||
|
EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL);
|
||||||
|
EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -2078,6 +2078,142 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using EventqueryKernelTimestampsExt = Test<EventUsedPacketSignalFixture<1, 1, 0, -1>>;
|
||||||
|
|
||||||
|
TEST_F(EventqueryKernelTimestampsExt, givenpCountLargerThanSupportedWhenCallingQueryKernelTimestampsExtThenpCountSetProperly) {
|
||||||
|
uint32_t pCount = 10;
|
||||||
|
event->setPacketsInUse(2u);
|
||||||
|
|
||||||
|
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||||
|
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||||
|
results.pSynchronizedTimestampsBuffer = nullptr;
|
||||||
|
|
||||||
|
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(2u, pCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryKernelTimestampsExtReturnsUnsupported) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
NEO::DebugManager.flags.EnableStaticPartitioning.set(0);
|
||||||
|
|
||||||
|
event->hasKerneMappedTsCapability = true;
|
||||||
|
|
||||||
|
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(2);
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||||
|
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||||
|
results.pSynchronizedTimestampsBuffer = nullptr;
|
||||||
|
|
||||||
|
uint32_t pCount = 10;
|
||||||
|
auto result = event->queryKernelTimestampsExt(device, &pCount, &results);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhenQueryKernelTimestampsExtIsCalledCorrectValuesAreReturned) {
|
||||||
|
|
||||||
|
typename MockTimestampPackets32::Packet packetData[3];
|
||||||
|
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits = 32;
|
||||||
|
auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper();
|
||||||
|
event->setPacketsInUse(3u);
|
||||||
|
event->hasKerneMappedTsCapability = true;
|
||||||
|
const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution;
|
||||||
|
const int64_t gpuReferenceTimeInNs = 2000;
|
||||||
|
const int64_t cpuReferenceTimeInNs = 3000;
|
||||||
|
const auto maxKernelTsValue = maxNBitValue(32);
|
||||||
|
|
||||||
|
NEO::TimeStampData referenceTs{static_cast<uint64_t>(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs};
|
||||||
|
event->setReferenceTs(referenceTs);
|
||||||
|
|
||||||
|
auto timeToTimeStamp = [&](uint32_t timeInNs) {
|
||||||
|
return static_cast<uint32_t>(timeInNs / deviceTsFrequency);
|
||||||
|
};
|
||||||
|
|
||||||
|
packetData[0].contextStart = 50u;
|
||||||
|
packetData[0].contextEnd = 100u;
|
||||||
|
packetData[0].globalStart = timeToTimeStamp(4000u);
|
||||||
|
packetData[0].globalEnd = timeToTimeStamp(5000u);
|
||||||
|
|
||||||
|
// Device Ts overflow case
|
||||||
|
packetData[1].contextStart = 20u;
|
||||||
|
packetData[1].contextEnd = 30u;
|
||||||
|
packetData[1].globalStart = timeToTimeStamp(500u);
|
||||||
|
packetData[1].globalEnd = timeToTimeStamp(1500u);
|
||||||
|
|
||||||
|
packetData[2].contextStart = 20u;
|
||||||
|
packetData[2].contextEnd = 30u;
|
||||||
|
packetData[2].globalStart = timeToTimeStamp(5000u);
|
||||||
|
packetData[2].globalEnd = timeToTimeStamp(500u);
|
||||||
|
|
||||||
|
event->hostAddress = packetData;
|
||||||
|
uint32_t count = 0;
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, nullptr));
|
||||||
|
EXPECT_EQ(count, 3u);
|
||||||
|
|
||||||
|
std::vector<ze_kernel_timestamp_result_t> kernelTsBuffer(count);
|
||||||
|
std::vector<ze_synchronized_timestamp_result_ext_t> synchronizedTsBuffer(count);
|
||||||
|
|
||||||
|
ze_event_query_kernel_timestamps_results_ext_properties_t results{};
|
||||||
|
results.pKernelTimestampsBuffer = kernelTsBuffer.data();
|
||||||
|
results.pSynchronizedTimestampsBuffer = synchronizedTsBuffer.data();
|
||||||
|
|
||||||
|
for (uint32_t packetId = 0; packetId < count; packetId++) {
|
||||||
|
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
|
||||||
|
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, &results));
|
||||||
|
uint64_t errorOffset = 5;
|
||||||
|
// Packet 1
|
||||||
|
auto expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 4000u;
|
||||||
|
auto expectedGlobalEnd = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u;
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||||
|
|
||||||
|
auto expectedContextStart = expectedGlobalStart;
|
||||||
|
auto expectedContextEnd = expectedContextStart + (packetData[0].contextEnd - packetData[0].contextStart) * deviceTsFrequency;
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd + errorOffset);
|
||||||
|
|
||||||
|
// Packet 2
|
||||||
|
expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 500u +
|
||||||
|
static_cast<uint64_t>(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * deviceTsFrequency);
|
||||||
|
expectedGlobalEnd = expectedGlobalStart + (1500 - 500);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||||
|
|
||||||
|
expectedContextStart = expectedGlobalStart;
|
||||||
|
expectedContextEnd = expectedContextStart + (packetData[1].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd + errorOffset);
|
||||||
|
|
||||||
|
// Packet 3
|
||||||
|
expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u;
|
||||||
|
expectedGlobalEnd = expectedGlobalStart + (static_cast<uint64_t>(maxKernelTsValue * deviceTsFrequency) - 5000u + 500u);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd + errorOffset);
|
||||||
|
|
||||||
|
expectedContextStart = expectedGlobalStart;
|
||||||
|
expectedContextEnd = expectedContextStart + (packetData[2].contextEnd - packetData[1].contextStart) * deviceTsFrequency;
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart + errorOffset);
|
||||||
|
EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd - errorOffset);
|
||||||
|
EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset);
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
|
HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) {
|
||||||
typename MockTimestampPackets32::Packet data = {};
|
typename MockTimestampPackets32::Packet data = {};
|
||||||
data.contextStart = 1u;
|
data.contextStart = 1u;
|
||||||
|
|||||||
@@ -550,6 +550,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum")
|
DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, CommandListTimestampRefreshIntervalInMilliSec, -1, "-1: use driver default, This value sets the refresh interval for getting synchronized GPU and CPU timestamp")
|
||||||
/* Binary Cache */
|
/* Binary Cache */
|
||||||
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")
|
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2021 Intel Corporation
|
* Copyright (C) 2018-2023 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -10,6 +10,7 @@
|
|||||||
#include "shared/source/os_interface/linux/device_time_drm.h"
|
#include "shared/source/os_interface/linux/device_time_drm.h"
|
||||||
#include "shared/source/os_interface/os_interface.h"
|
#include "shared/source/os_interface/os_interface.h"
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|||||||
@@ -88,4 +88,14 @@ class MockOSTimeWithConstTimestamp : public OSTime {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class MockOSTimeWithConfigurableCpuTimestamp : public MockOSTimeWithConstTimestamp {
|
||||||
|
public:
|
||||||
|
uint64_t mockCpuTime = 0;
|
||||||
|
bool getCpuTime(uint64_t *timeStamp) override {
|
||||||
|
*timeStamp = mockCpuTime;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -536,4 +536,5 @@ VfBarResourceAllocationWa = 1
|
|||||||
EnableDynamicPostSyncAllocLayout = -1
|
EnableDynamicPostSyncAllocLayout = -1
|
||||||
ForceNumberOfThreadsInGpgpuThreadGroup = -1
|
ForceNumberOfThreadsInGpgpuThreadGroup = -1
|
||||||
PrintTimestampPacketUsage = -1
|
PrintTimestampPacketUsage = -1
|
||||||
|
CommandListTimestampRefreshIntervalInMilliSec = -1
|
||||||
# Please don't edit below this line
|
# Please don't edit below this line
|
||||||
|
|||||||
Reference in New Issue
Block a user