diff --git a/level_zero/api/core/ze_core_loader.cpp b/level_zero/api/core/ze_core_loader.cpp index c9d34fae04..f36f281c7a 100644 --- a/level_zero/api/core/ze_core_loader.cpp +++ b/level_zero/api/core/ze_core_loader.cpp @@ -447,6 +447,7 @@ zeGetEventProcAddrTable( pDdiTable->pfnQueryStatus = L0::zeEventQueryStatus; pDdiTable->pfnHostReset = L0::zeEventHostReset; pDdiTable->pfnQueryKernelTimestamp = L0::zeEventQueryKernelTimestamp; + pDdiTable->pfnQueryKernelTimestampsExt = L0::zeEventQueryKernelTimestampsExt; driverDdiTable.coreDdiTable.Event = *pDdiTable; if (driverDdiTable.enableTracing) { pDdiTable->pfnCreate = zeEventCreateTracing; diff --git a/level_zero/api/core/ze_event_api_entrypoints.h b/level_zero/api/core/ze_event_api_entrypoints.h index 5dcd67f3bb..64598eb9a8 100644 --- a/level_zero/api/core/ze_event_api_entrypoints.h +++ b/level_zero/api/core/ze_event_api_entrypoints.h @@ -100,6 +100,14 @@ ze_result_t zeEventQueryKernelTimestamp( ze_kernel_timestamp_result_t *timestampType) { return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType); } + +ze_result_t zeEventQueryKernelTimestampsExt( + ze_event_handle_t hEvent, + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) { + return L0::Event::fromHandle(hEvent)->queryKernelTimestampsExt(L0::Device::fromHandle(hDevice), pCount, pResults); +} } // namespace L0 extern "C" { @@ -223,6 +231,18 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp( dstptr); } +ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestampsExt( + ze_event_handle_t hEvent, + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) { + return L0::zeEventQueryKernelTimestampsExt( + hEvent, + hDevice, + pCount, + pResults); +} + ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps( ze_command_list_handle_t hCommandList, uint32_t numEvents, diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index e15feaedd8..3755070653 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -396,6 +396,7 @@ struct CommandList : _ze_command_list_handle_t { uint32_t commandListPerThreadPrivateScratchSize = 0u; uint32_t partitionCount = 1; uint32_t defaultMocsIndex = 0; + uint64_t timestampRefreshIntervalInNanoSec = 0; bool isFlushTaskSubmissionEnabled = false; bool isSyncModeQueue = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 1cdb2d65f3..5a6a4e93af 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -18,6 +18,7 @@ #include "shared/source/helpers/blit_properties.h" #include "shared/source/helpers/definitions/command_encoder_args.h" #include "shared/source/helpers/gfx_core_helper.h" +#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/logical_state_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/preamble.h" @@ -133,6 +134,9 @@ ze_result_t CommandListCoreFamily::reset() { this->ownedPrivateAllocations.clear(); cmdListCurrentStartOffset = 0; + mappedTsEventList.clear(); + previousSynchronizedTimestamp = {}; + return ZE_RESULT_SUCCESS; } @@ -232,6 +236,19 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO createLogicalStateHelper(); + const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); + if (hwInfo.capabilityTable.kernelTimestampValidBits < 64u) { + this->timestampRefreshIntervalInNanoSec = static_cast(maxKernelTsValue * frequency); + } else { + this->timestampRefreshIntervalInNanoSec = maxKernelTsValue; + } + if (NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get() != -1) { + constexpr uint32_t milliSecondsToNanoSeconds = 1000000u; + const uint32_t refreshTime = NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.get(); + this->timestampRefreshIntervalInNanoSec = refreshTime * milliSecondsToNanoSeconds; + } + return returnType; } @@ -320,7 +337,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions, event, launchParams); - + addToMappedEventList(event); if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), @@ -352,8 +369,11 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( CmdListKernelLaunchParams launchParams = {}; launchParams.isCooperative = true; - return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs, - event, launchParams); + + ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs, + event, launchParams); + addToMappedEventList(event); + return ret; } template @@ -382,6 +402,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ launchParams.isIndirect = true; ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer, nullptr, launchParams); + addToMappedEventList(event); appendSignalEventPostWalker(event); return ret; @@ -427,7 +448,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd return ret; } } - + addToMappedEventList(event); appendSignalEventPostWalker(event); return ret; @@ -496,6 +517,7 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint appendEventForProfiling(signalEvent, true); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); appendSignalEventPostWalker(signalEvent); + addToMappedEventList(signalEvent); return ZE_RESULT_SUCCESS; } @@ -562,9 +584,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i } if (isCopyOnly()) { - return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(), - {0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch, - rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event); + auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(), + {0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch, + rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event); + addToMappedEventList(Event::fromHandle(hEvent)); + return status; } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); @@ -639,9 +663,12 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i CmdListKernelLaunchParams launchParams = {}; launchParams.isBuiltInKernel = true; - return CommandListCoreFamily::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs, - event, numWaitEvents, phWaitEvents, - launchParams, relaxedOrderingDispatch); + + auto status = CommandListCoreFamily::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs, + event, numWaitEvents, phWaitEvents, + launchParams, relaxedOrderingDispatch); + addToMappedEventList(Event::fromHandle(hEvent)); + return status; } template @@ -706,9 +733,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * } if (isCopyOnly()) { - return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc, - {pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch, - rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event); + auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc, + {pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch, + rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event); + addToMappedEventList(event); + return status; } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); @@ -791,6 +820,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); ret = CommandListCoreFamily::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs, event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); + addToMappedEventList(event); addFlushRequiredCommand(allocationStruct.needsFlush, event); @@ -890,9 +920,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image auto dstSlicePitch = (dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch; - return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(), - {srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event); + auto status = appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(), + {srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event); + addToMappedEventList(event); + return status; } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); @@ -929,9 +961,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image CmdListKernelLaunchParams launchParams = {}; launchParams.isBuiltInKernel = true; - return CommandListCoreFamily::appendLaunchKernel(kernel->toHandle(), &kernelArgs, - event, numWaitEvents, phWaitEvents, - launchParams, relaxedOrderingDispatch); + auto status = CommandListCoreFamily::appendLaunchKernel(kernel->toHandle(), &kernelArgs, + event, numWaitEvents, phWaitEvents, + launchParams, relaxedOrderingDispatch); + addToMappedEventList(event); + return status; } template @@ -1154,7 +1188,6 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph commandContainer.addToResidencyContainer(clearColorAllocation); appendEventForProfiling(signalEvent, true); - NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa); makeResidentDummyAllocation(); @@ -1366,6 +1399,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); + addToMappedEventList(signalEvent); if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) { obtainNewTimestampPacketNode(); @@ -1459,6 +1493,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d return result; } + addToMappedEventList(signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { @@ -1680,7 +1715,9 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } if (isCopyOnly()) { - return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + addToMappedEventList(signalEvent); + return status; } ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false); @@ -1873,6 +1910,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } } + addToMappedEventList(signalEvent); appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); @@ -2407,6 +2445,8 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( } commandContainer.addToResidencyContainer(allocationStruct.alloc); + addToMappedEventList(signalEvent); + return ZE_RESULT_SUCCESS; } @@ -2518,6 +2558,8 @@ ze_result_t CommandListCoreFamily::appendQueryKernelTimestamps( return appendResult; } + addToMappedEventList(Event::fromHandle(hSignalEvent)); + return ZE_RESULT_SUCCESS; } @@ -2886,6 +2928,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ appendComputeBarrierCommand(); } + addToMappedEventList(signalEvent); appendSignalEventPostWalker(signalEvent); return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 56ff385d4d..9564ed11c4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -104,6 +104,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushBcsTask hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies ); + CommandListImp::storeReferenceTsToMappedEvents(true); + return csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo()); } @@ -198,6 +200,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushRegular } } + CommandListImp::storeReferenceTsToMappedEvents(true); + return this->csr->flushTask( cmdStreamTask, taskStartOffset, @@ -875,6 +879,8 @@ ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(cons } if (signalEvent) { + CommandListImp::addToMappedEventList(signalEvent); + CommandListImp::storeReferenceTsToMappedEvents(true); signalEvent->setGpuStartTimestamp(); } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 1a7a4b268d..21c814d44a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -19,6 +19,7 @@ #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" +#include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/sys_calls_common.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" @@ -237,4 +238,32 @@ void CommandListImp::enableInOrderExecution() { inOrderExecutionEnabled = true; } +void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { + if (mappedTsEventList.size()) { + uint64_t currentCpuTimeStamp = 0; + device->getNEODevice()->getOSTime()->getCpuTime(¤tCpuTimeStamp); + const auto recalculate = + (currentCpuTimeStamp - previousSynchronizedTimestamp.cpuTimeinNS) > timestampRefreshIntervalInNanoSec; + if (previousSynchronizedTimestamp.cpuTimeinNS == 0 || recalculate) { + device->getNEODevice()->getOSTime()->getCpuGpuTime(&previousSynchronizedTimestamp); + } + + for (auto &event : mappedTsEventList) { + event->setReferenceTs(previousSynchronizedTimestamp); + } + + if (isClearEnabled) { + mappedTsEventList.clear(); + } + } +} + +void CommandListImp::addToMappedEventList(Event *event) { + if (event && event->hasKerneMappedTsCapability) { + if (std::find(mappedTsEventList.begin(), mappedTsEventList.end(), event) == mappedTsEventList.end()) { + mappedTsEventList.push_back(event); + } + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index f678ba9afb..2043005aea 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/timestamp_packet_container.h" +#include "shared/source/os_interface/os_time.h" #include "level_zero/core/source/cmdlist/cmdlist.h" @@ -36,6 +37,9 @@ struct CommandListImp : CommandList { void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties); void enableInOrderExecution(); bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; } + void storeReferenceTsToMappedEvents(bool clear); + void addToMappedEventList(Event *event); + const std::vector &peekMappedEventList() { return mappedTsEventList; } protected: std::unique_ptr nonImmediateLogicalStateHelper; @@ -51,6 +55,8 @@ struct CommandListImp : CommandList { static constexpr bool cmdListDefaultPipelineSelectModeSelected = true; static constexpr bool cmdListDefaultMediaSamplerClockGate = false; static constexpr bool cmdListDefaultGlobalAtomics = false; + std::vector mappedTsEventList{}; + NEO::TimeStampData previousSynchronizedTimestamp{}; }; } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 56ea245410..4fa15d8980 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -222,6 +222,12 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( this->programStateSipEndWA(ctx.stateSipRequired, child); this->assignCsrTaskCountToFenceIfAvailable(hFence); this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child); + + for (auto i = 0u; i < numCommandLists; ++i) { + auto commandList = static_cast(CommandList::fromHandle(commandListHandles[i])); + commandList->storeReferenceTsToMappedEvents(false); + } + auto submitResult = this->prepareAndSubmitBatchBuffer(ctx, child); this->csr->setPreemptionMode(ctx.statePreemption); @@ -288,6 +294,11 @@ ze_result_t CommandQueueHw::executeCommandListsCopyOnly( this->assignCsrTaskCountToFenceIfAvailable(hFence); this->programLastCommandListReturnBbStart(child, ctx); + + for (auto i = 0u; i < numCommandLists; ++i) { + auto commandList = static_cast(CommandList::fromHandle(phCommandLists[i])); + commandList->storeReferenceTsToMappedEvents(false); + } this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child); this->makeCsrTagAllocationResident(); diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index d9fd8983cf..90953fea20 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -876,6 +876,9 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) NEO::Device *activeDevice = getActiveDevice(); auto &compilerProductHelper = activeDevice->getCompilerProductHelper(); zeDeviceIpVersion->ipVersion = compilerProductHelper.getHwIpVersion(hardwareInfo); + } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES) { + ze_event_query_kernel_timestamps_ext_properties_t *kernelTimestampExtProperties = reinterpret_cast(extendedProperties); + kernelTimestampExtProperties->flags = ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL | ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED; } extendedProperties = static_cast(extendedProperties->pNext); } diff --git a/level_zero/core/source/driver/driver_handle_imp.h b/level_zero/core/source/driver/driver_handle_imp.h index 82b9347474..07f2fda7f7 100644 --- a/level_zero/core/source/driver/driver_handle_imp.h +++ b/level_zero/core/source/driver/driver_handle_imp.h @@ -119,7 +119,8 @@ struct DriverHandleImp : public DriverHandle { {ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_CURRENT}, {ZE_CACHE_RESERVATION_EXT_NAME, ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT}, {ZE_IMAGE_VIEW_EXT_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT}, - {ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT}}; + {ZE_IMAGE_VIEW_PLANAR_EXT_NAME, ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT}, + {ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME, ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_CURRENT}}; uint64_t uuidTimestamp = 0u; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 4c93cd73bd..aa1cdb4dc1 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -9,6 +9,7 @@ #include "shared/source/helpers/timestamp_packet_constants.h" #include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" +#include "shared/source/os_interface/os_time.h" #include @@ -69,6 +70,8 @@ struct Event : _ze_event_handle_t { virtual ze_result_t reset() = 0; virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0; virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0; + virtual ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) = 0; + enum State : uint32_t { STATE_SIGNALED = 0u, HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits::max() - 2, @@ -210,6 +213,10 @@ struct Event : _ze_event_handle_t { void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes); bool isInOrderExecEvent() const { return inOrderExecEvent; } const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); } + void setReferenceTs(NEO::TimeStampData ×tamp) { + referenceTs = timestamp; + } + bool hasKerneMappedTsCapability = false; protected: Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {} @@ -218,6 +225,7 @@ struct Event : _ze_event_handle_t { uint64_t globalEndTS = 1; uint64_t contextStartTS = 1; uint64_t contextEndTS = 1; + NEO::TimeStampData referenceTs{}; std::chrono::microseconds gpuHangCheckPeriod{500'000}; std::bitset l3FlushAppliedOnKernel; @@ -268,6 +276,9 @@ struct EventPool : _ze_event_pool_handle_t { DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles); EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) { eventPoolFlags = desc->flags; + if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) { + eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + } } virtual ~EventPool(); MOCKABLE_VIRTUAL ze_result_t destroy(); @@ -299,6 +310,13 @@ struct EventPool : _ze_event_pool_handle_t { return false; } + bool isEventPoolKerneMappedTsFlagSet() const { + if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) { + return true; + } + return false; + } + uint32_t getMaxKernelCount() const { return maxKernelCount; } diff --git a/level_zero/core/source/event/event_imp.h b/level_zero/core/source/event/event_imp.h index 72e49d4825..72b238a1c5 100644 --- a/level_zero/core/source/event/event_imp.h +++ b/level_zero/core/source/event/event_imp.h @@ -52,6 +52,7 @@ struct EventImp : public Event { ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override; ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override; + ze_result_t queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override; void resetDeviceCompletionData(bool resetAllPackets); void resetKernelCountAndPacketUsedCount() override; @@ -74,6 +75,8 @@ struct EventImp : public Event { ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal); MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address); void setRemainingPackets(TagSizeT eventVal, void *nextPacketAddress, uint32_t packetsAlreadySet); + void getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer, + const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer); }; } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 55a752cc3a..76249df11b 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -9,6 +9,7 @@ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/sub_device.h" +#include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_time.h" @@ -32,6 +33,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->setEventTimestampFlag(true); event->setSinglePacketSize(NEO::TimestampPackets::getSinglePacketSize()); } + event->hasKerneMappedTsCapability = eventPool->isEventPoolKerneMappedTsFlagSet(); auto &hwInfo = neoDevice->getHardwareInfo(); event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); @@ -494,6 +496,85 @@ ze_result_t EventImp::queryTimestampsExp(Device *device, uint32_t *cou return ZE_RESULT_SUCCESS; } +template +void EventImp::getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer, + const uint32_t count, const ze_kernel_timestamp_result_t *pKernelTimestampsBuffer) { + + auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper(); + auto &hwInfo = device->getNEODevice()->getHardwareInfo(); + const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, frequency); + const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); + + auto getDuration = [&](uint64_t startTs, uint64_t endTs) { + const uint64_t maxValue = maxKernelTsValue; + startTs &= maxValue; + endTs &= maxValue; + + if (startTs > endTs) { + // Resolve overflows + return endTs + (maxValue - startTs); + } else { + return endTs - startTs; + } + }; + + const auto &referenceHostTsInNs = referenceTs.cpuTimeinNS; + + // High Level Approach: + // startTimeStamp = (referenceHostTsInNs - submitDeviceTs) + kernelDeviceTsStart + // deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart + // endTimeStamp = startTimeStamp + deviceDuration + + // Get offset between Device and Host timestamps + const int64_t tsOffsetInNs = referenceHostTsInNs - deviceTsInNs; + + auto calculateSynchronizedTs = [&](ze_synchronized_timestamp_data_ext_t *synchronizedTs, const ze_kernel_timestamp_data_t *deviceTs) { + // Add the offset to the kernel timestamp to find the start timestamp on the CPU timescale + int64_t offset = tsOffsetInNs; + uint64_t startTimeStampInNs = static_cast(deviceTs->kernelStart * frequency) + offset; + if (startTimeStampInNs < referenceHostTsInNs) { + offset += static_cast(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * frequency); + startTimeStampInNs = static_cast(deviceTs->kernelStart * frequency) + offset; + } + + // Get the kernel timestamp duration + uint64_t deviceDuration = getDuration(deviceTs->kernelStart, deviceTs->kernelEnd); + uint64_t deviceDurationNs = static_cast(deviceDuration * frequency); + // Add the duration to the startTimeStamp to get the endTimeStamp + uint64_t endTimeStampInNs = startTimeStampInNs + deviceDurationNs; + + synchronizedTs->kernelStart = startTimeStampInNs; + synchronizedTs->kernelEnd = endTimeStampInNs; + }; + + for (uint32_t index = 0; index < count; index++) { + calculateSynchronizedTs(&pSynchronizedTimestampsBuffer[index].global, &pKernelTimestampsBuffer[index].global); + + pSynchronizedTimestampsBuffer[index].context.kernelStart = pSynchronizedTimestampsBuffer[index].global.kernelStart; + uint64_t deviceDuration = getDuration(pKernelTimestampsBuffer[index].context.kernelStart, + pKernelTimestampsBuffer[index].context.kernelEnd); + uint64_t deviceDurationNs = static_cast(deviceDuration * frequency); + pSynchronizedTimestampsBuffer[index].context.kernelEnd = pSynchronizedTimestampsBuffer[index].context.kernelStart + + deviceDurationNs; + } +} + +template +ze_result_t EventImp::queryKernelTimestampsExt(Device *device, uint32_t *pCount, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) { + + if (*pCount == 0) { + return queryTimestampsExp(device, pCount, nullptr); + } + + ze_result_t status = queryTimestampsExp(device, pCount, pResults->pKernelTimestampsBuffer); + + if (status == ZE_RESULT_SUCCESS && hasKerneMappedTsCapability) { + getSynchronizedKernelTimestamps(pResults->pSynchronizedTimestampsBuffer, *pCount, pResults->pKernelTimestampsBuffer); + } + return status; +} + template uint32_t EventImp::getPacketsInUse() const { uint32_t packetsInUse = 0; diff --git a/level_zero/core/test/black_box_tests/zello_timestamp.cpp b/level_zero/core/test/black_box_tests/zello_timestamp.cpp index a3336a6cb6..f3d3a513b5 100644 --- a/level_zero/core/test/black_box_tests/zello_timestamp.cpp +++ b/level_zero/core/test/black_box_tests/zello_timestamp.cpp @@ -7,6 +7,7 @@ #include "zello_common.h" +#include #include #include #include @@ -27,6 +28,20 @@ inline std::vector loadBinaryFile(const std::string &filePath) { return binaryFile; } +void createImmediateCommandList(ze_device_handle_t &device, + ze_context_handle_t &context, + bool syncMode, + ze_command_list_handle_t &cmdList) { + ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; + cmdQueueDesc.pNext = nullptr; + cmdQueueDesc.flags = 0; + cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); + cmdQueueDesc.index = 0; + selectQueueMode(cmdQueueDesc, syncMode); + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList)); +} + void createCmdQueueAndCmdList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, @@ -59,6 +74,21 @@ void createCmdQueueAndCmdList(ze_context_handle_t &context, SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); } +void createImmediateCommandList(ze_device_handle_t &device, + ze_context_handle_t &context, + uint32_t queueGroupOrdinal, + bool syncMode, + ze_command_list_handle_t &cmdList) { + ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; + cmdQueueDesc.pNext = nullptr; + cmdQueueDesc.flags = 0; + cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + cmdQueueDesc.ordinal = queueGroupOrdinal; + cmdQueueDesc.index = 0; + selectQueueMode(cmdQueueDesc, syncMode); + SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList)); +} + bool testWriteGlobalTimestamp(int argc, char *argv[], ze_context_handle_t &context, ze_driver_handle_t &driver, @@ -394,7 +424,8 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[], ze_event_pool_handle_t eventPool; ze_event_handle_t kernelTsEvent; - bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow"); + bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-d", "--runTillDeviceTsOverflow"); + bool runTillKernelTsOverflows = isParamEnabled(argc, argv, "-k", "--runTillKernelTsOverflow"); // Create commandQueue and cmdList createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); @@ -489,7 +520,7 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[], } }; - uint64_t unusedHostTs, referenceDeviceTs; + uint64_t unusedHostTs, referenceDeviceTs, referenceKernelTs = 0; SUCCESS_OR_TERMINATE(zeDeviceGetGlobalTimestamps(device, &unusedHostTs, &referenceDeviceTs)); std::cout << "ReferenceDeviceTs: " << referenceDeviceTs << "\n"; @@ -502,6 +533,12 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[], SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast(timestampBuffer); + auto currMinKernelTs = std::min(kernelTsResults->global.kernelStart, kernelTsResults->global.kernelEnd); + if (referenceKernelTs == 0) { + referenceKernelTs = currMinKernelTs; + std::cout << "ReferencekernelTs: " << referenceKernelTs << "\n"; + } + // High Level Approach: // startTimeStamp = (submitHostTs - submitDeviceTs) + kernelDeviceTsStart // deviceDuration = kernelDeviceTsEnd - kernelDeviceTsStart @@ -528,10 +565,14 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[], std::cout << " | submit[host,device]: [" << submitHostTs << ", " << submitDeviceTs << "]"; std::cout << " | deviceTsOnHostTimescale[start, end] : [" << startTimeStamp << ", " << endTimeStamp << " ] \n"; ++iter; - if (runTillDeviceTsOverflows) { + if (runTillDeviceTsOverflows || runTillKernelTsOverflows) { i = 0; - if (referenceDeviceTs > submitDeviceTs) { - runTillDeviceTsOverflows = false; + if (runTillDeviceTsOverflows && referenceDeviceTs > submitDeviceTs) { + runTillKernelTsOverflows = runTillDeviceTsOverflows = false; + } + + if (runTillKernelTsOverflows && referenceKernelTs > currMinKernelTs) { + runTillKernelTsOverflows = runTillDeviceTsOverflows = false; } } } @@ -549,6 +590,216 @@ bool testKernelTimestampMapToHostTimescale(int argc, char *argv[], return true; } +bool testKernelMappedTimestampMap(int argc, char *argv[], + ze_context_handle_t &context, + ze_driver_handle_t &driver, + ze_device_handle_t &device) { + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + ze_module_handle_t module; + ze_kernel_handle_t kernel; + void *srcBuffer = nullptr; + void *dstBuffer = nullptr; + void *timestampBuffer = nullptr; + ze_event_pool_handle_t eventPool; + constexpr uint32_t maxEventUsageCount = 3; + uint32_t eventUsageCount = maxEventUsageCount; + constexpr size_t allocSize = 4096; + ze_group_count_t dispatchTraits; + + bool runTillDeviceTsOverflows = isParamEnabled(argc, argv, "-o", "--runTillOverflow"); + bool useSingleCommand = isParamEnabled(argc, argv, "-s", "--useSingleCommand"); + bool useImmediate = isParamEnabled(argc, argv, "-i", "--useImmediate"); + int defaultVerboseLevel = 1; + int verboseLevel = getParamValue(argc, argv, "-l", "--verboseLevel", defaultVerboseLevel); + + if (useSingleCommand) { + eventUsageCount = 1; + } + + ze_event_handle_t kernelTsEvent[maxEventUsageCount]; + createEventPoolAndEvents(context, device, eventPool, + (ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP), maxEventUsageCount, kernelTsEvent, + ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_HOST); + + // Create commandQueue and cmdList + if (useImmediate) { + createImmediateCommandList(device, context, false, cmdList); + } else { + createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); + } + + auto prepareKernel = [&]() { + // Create two shared buffers + ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; + deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; + deviceDesc.ordinal = 0; + + ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; + hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; + + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, sizeof(ze_kernel_timestamp_result_t), 1, ×tampBuffer)); + + // Initialize memory + constexpr uint8_t val = 55; + memset(srcBuffer, val, allocSize); + memset(dstBuffer, 0, allocSize); + memset(timestampBuffer, 0, sizeof(ze_kernel_timestamp_result_t)); + + // Create kernel + auto spirvModule = loadBinaryFile("copy_buffer_to_buffer.spv"); + if (spirvModule.size() == 0) { + return false; + } + + ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; + moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; + moduleDesc.pInputModule = reinterpret_cast(spirvModule.data()); + moduleDesc.inputSize = spirvModule.size(); + SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr)); + + ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; + kernelDesc.pKernelName = "CopyBufferToBufferBytes"; + SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); + + uint32_t groupSizeX = 32u; + uint32_t groupSizeY = 1u; + uint32_t groupSizeZ = 1u; + SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, static_cast(allocSize), 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); + SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); + + uint32_t offset = 0; + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); + + dispatchTraits.groupCountX = static_cast(allocSize) / groupSizeX; + dispatchTraits.groupCountY = 1u; + dispatchTraits.groupCountZ = 1u; + return true; + }; + + uint64_t previousMaximumSyncTs = std::numeric_limits::min(); + uint64_t referenceMinimumGlobalTs = 0; + + prepareKernel(); + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr)); + if (!useSingleCommand) { + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr)); + } + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + } + + for (uint32_t i = 0; i < 10; i++) { + + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + } else { + // Immediate Commandlist case + SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent[0], 0, nullptr)); + if (!useSingleCommand) { + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, kernelTsEvent[1], 0u, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, kernelTsEvent[2], 0, nullptr)); + } + SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(cmdList, std::numeric_limits::max())); + } + + uint64_t currentMinimumSyncTs = std::numeric_limits::max(); + uint64_t currentMaximumSyncTs = std::numeric_limits::min(); + uint64_t currentMinimumGlobalTs = std::numeric_limits::max(); + + for (uint32_t j = 0; j < eventUsageCount; j++) { + uint32_t count = 0; + if (verboseLevel == 1) { + std::cout << "[iter(" << i << ")][event(" << j << ")]====>\n"; + } + SUCCESS_OR_TERMINATE(zeEventQueryStatus(kernelTsEvent[j])); + SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, nullptr)); + if (count == 0) { + return false; + } + + std::vector timestampResult(count); + std::vector syncTimestampResult(count); + + ze_event_query_kernel_timestamps_results_ext_properties_t properties = {}; + properties.pNext = nullptr; + properties.pKernelTimestampsBuffer = timestampResult.data(); + properties.pSynchronizedTimestampsBuffer = syncTimestampResult.data(); + SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestampsExt(kernelTsEvent[j], device, &count, &properties)); + + for (uint32_t k = 0; k < count; k++) { + const auto &ts = properties.pKernelTimestampsBuffer[k]; + const auto &syncTs = properties.pSynchronizedTimestampsBuffer[k]; + + currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelStart); + currentMinimumSyncTs = std::min(currentMinimumSyncTs, syncTs.global.kernelEnd); + currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelStart); + currentMaximumSyncTs = std::max(currentMaximumSyncTs, syncTs.global.kernelEnd); + + currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelStart); + currentMinimumGlobalTs = std::min(currentMinimumGlobalTs, ts.global.kernelEnd); + + if (verboseLevel == 1) { + std::cout << "\t[packedId:" << k << " ]" + << "[global-ts(" << ts.global.kernelStart << " , " << ts.global.kernelEnd << " ) " + << "| syncTs( " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd << " )] " + << "# [context-ts( " << ts.context.kernelStart << " , " << ts.context.kernelEnd << " ) " + << "| syncTs ( " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << " )]\n"; + } + + if (verboseLevel == 2) { + std::cout << "KernelSyncTs: " << syncTs.global.kernelStart << " , " << syncTs.global.kernelEnd + << " | ContextSyncTs: " << syncTs.context.kernelStart << " , " << syncTs.context.kernelEnd << "\n"; + } + } + SUCCESS_OR_TERMINATE(zeEventHostReset(kernelTsEvent[j])); + } + + if (currentMinimumSyncTs < previousMaximumSyncTs) { + std::cout << "\n\n!!FAILED: Current Minimum Ts : " << currentMinimumSyncTs << " less than Previous Maximum Ts : " << previousMaximumSyncTs << "\n\n"; + return false; + } + previousMaximumSyncTs = currentMaximumSyncTs; + + if (!referenceMinimumGlobalTs) { + referenceMinimumGlobalTs = currentMinimumGlobalTs; + } else { + if (runTillDeviceTsOverflows) { + if (currentMinimumGlobalTs < referenceMinimumGlobalTs) { + runTillDeviceTsOverflows = false; + } + i = 0; + } + } + } + + // Cleanup + SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, timestampBuffer)); + for (uint32_t j = 0; j < eventUsageCount; j++) { + SUCCESS_OR_TERMINATE(zeEventDestroy(kernelTsEvent[j])); + } + + SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + if (!useImmediate) { + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); + } + SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel)); + SUCCESS_OR_TERMINATE(zeModuleDestroy(module)); + return true; +} + int main(int argc, char *argv[]) { const std::string blackBoxName("Zello Timestamp"); verbose = isVerbose(argc, argv); @@ -563,6 +814,7 @@ int main(int argc, char *argv[]) { supportedTests["testKernelTimestampAppendQueryWithDeviceProperties"] = testKernelTimestampAppendQueryWithDeviceProperties; supportedTests["testWriteGlobalTimestamp"] = testWriteGlobalTimestamp; supportedTests["testKernelTimestampHostQuery"] = testKernelTimestampHostQuery; + supportedTests["testKernelMappedTimestampMap"] = testKernelMappedTimestampMap; const char *defaultString = "testKernelTimestampAppendQueryWithDeviceProperties"; const char *test = getParamValue(argc, argv, "-t", "--test", defaultString); diff --git a/level_zero/core/test/unit_tests/mocks/mock_event.h b/level_zero/core/test/unit_tests/mocks/mock_event.h index b4860c507c..ed13640eb6 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_event.h +++ b/level_zero/core/test/unit_tests/mocks/mock_event.h @@ -76,6 +76,7 @@ struct Mock : public Event { ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr)); ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps)); + ADDMETHOD_NOBASE(queryKernelTimestampsExt, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults)); // Fake an allocation for event memory alignas(16) uint32_t memory = -1; @@ -154,6 +155,9 @@ class MockEvent : public ::L0::Event { ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) override { return ZE_RESULT_SUCCESS; } + ze_result_t queryKernelTimestampsExt(L0::Device *device, uint32_t *count, ze_event_query_kernel_timestamps_results_ext_properties_t *pResults) override { + return ZE_RESULT_SUCCESS; + } uint32_t getPacketsUsedInLastKernel() override { return 1; } uint32_t getPacketsInUse() const override { return 1; } void resetPackets(bool resetAllPackets) override {} diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index bfe708faf7..8244261b13 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -3058,5 +3058,147 @@ HWTEST2_F(CommandListHostSynchronize, whenHostSychronizeIsCalledReturnInvalidArg commandList->destroy(); } +using CommandListMappedTimestampTest = CommandListAppendLaunchKernel; + +HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledThenTheEventIsAddedToMappedSignalList, IsAtLeastSkl) { + createKernel(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]); +} + +HWTEST2_F(CommandListMappedTimestampTest, givenSignalEventWithoutMappedTimstampWhenAppendApiIsCalledThenTheEventIsNotAddedToMappedSignalList, IsAtLeastSkl) { + createKernel(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_EQ(0u, commandList->peekMappedEventList().size()); +} + +HWTEST2_F(CommandListMappedTimestampTest, givenMappedTimestampSignalEventWhenAppendApiIsCalledMultipleTimesThenTheEventIsAddedOnceToMappedSignalList, IsAtLeastSkl) { + createKernel(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + returnValue = commandList->appendBarrier(event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + EXPECT_EQ(event.get(), commandList->peekMappedEventList()[0]); + EXPECT_EQ(1u, commandList->peekMappedEventList().size()); +} + +HWTEST2_F(CommandListMappedTimestampTest, givenEventIsAddedToMappedEventListWhenStoringReferenceTimestampWithClearThenEventsAreCleared, IsAtLeastSkl) { + createKernel(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + neoDevice->setOSTime(new MockOSTime()); + commandList->addToMappedEventList(event.get()); + commandList->storeReferenceTsToMappedEvents(true); + EXPECT_EQ(0u, commandList->peekMappedEventList().size()); +} + +HWTEST2_F(CommandListMappedTimestampTest, givenCommandListTimestampRefreshIntervalInMilliSecIsSetWhenStoringReferenceTimestampThenUpdatedRefreshIntervalIsUsed, IsPVC) { + + DebugManagerStateRestore restorer; + createKernel(); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + std::unique_ptr eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + std::unique_ptr event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + NEO::DebugManager.flags.CommandListTimestampRefreshIntervalInMilliSec.set(0); + auto commandList = std::make_unique>>(); + neoDevice->setOSTime(new MockOSTimeWithConfigurableCpuTimestamp()); + auto osTime = static_cast(neoDevice->getOSTime()); + commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + commandList->addToMappedEventList(event.get()); + osTime->mockCpuTime = 1; + commandList->storeReferenceTsToMappedEvents(false); + EXPECT_EQ(1u, commandList->peekMappedEventList().size()); + commandList->addToMappedEventList(event.get()); + commandList->storeReferenceTsToMappedEvents(false); + osTime->mockCpuTime = 2; + commandList->addToMappedEventList(event.get()); + commandList->storeReferenceTsToMappedEvents(true); + EXPECT_EQ(0u, commandList->peekMappedEventList().size()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index f48d8115c7..35a34c891e 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -4513,5 +4513,20 @@ TEST(DeviceReturnSubDevicesAsApiDevicesTest, GivenReturnSubDevicesAsApiDevicesIs multiDeviceFixture.tearDown(); } +TEST_F(DeviceTest, GivenValidDeviceWhenQueryingKernelTimestampsProptertiesThenCorrectPropertiesIsReturned) { + ze_device_properties_t devProps; + ze_event_query_kernel_timestamps_ext_properties_t tsProps; + + devProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + devProps.pNext = &tsProps; + + tsProps.stype = ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES; + tsProps.pNext = nullptr; + + EXPECT_EQ(ZE_RESULT_SUCCESS, zeDeviceGetProperties(device, &devProps)); + EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL); + EXPECT_NE(0u, tsProps.flags & ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 4e48d9f799..3908c6e6ac 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -2078,6 +2078,142 @@ TEST_F(EventQueryTimestampExpWithRootDeviceAndSubDevices, givenEventWhenQuerytim } } +using EventqueryKernelTimestampsExt = Test>; + +TEST_F(EventqueryKernelTimestampsExt, givenpCountLargerThanSupportedWhenCallingQueryKernelTimestampsExtThenpCountSetProperly) { + uint32_t pCount = 10; + event->setPacketsInUse(2u); + + std::vector kernelTsBuffer(2); + ze_event_query_kernel_timestamps_results_ext_properties_t results{}; + results.pKernelTimestampsBuffer = kernelTsBuffer.data(); + results.pSynchronizedTimestampsBuffer = nullptr; + + auto result = event->queryKernelTimestampsExt(device, &pCount, &results); + + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, pCount); +} + +TEST_F(EventqueryKernelTimestampsExt, givenEventWithStaticPartitionOffThenQueryKernelTimestampsExtReturnsUnsupported) { + DebugManagerStateRestore restore; + NEO::DebugManager.flags.EnableStaticPartitioning.set(0); + + event->hasKerneMappedTsCapability = true; + + std::vector kernelTsBuffer(2); + ze_event_query_kernel_timestamps_results_ext_properties_t results{}; + results.pKernelTimestampsBuffer = kernelTsBuffer.data(); + results.pSynchronizedTimestampsBuffer = nullptr; + + uint32_t pCount = 10; + auto result = event->queryKernelTimestampsExt(device, &pCount, &results); + + EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); +} + +TEST_F(EventqueryKernelTimestampsExt, givenEventWithMappedTimestampCapabilityWhenQueryKernelTimestampsExtIsCalledCorrectValuesAreReturned) { + + typename MockTimestampPackets32::Packet packetData[3]; + device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.kernelTimestampValidBits = 32; + auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper(); + event->setPacketsInUse(3u); + event->hasKerneMappedTsCapability = true; + const auto deviceTsFrequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + const int64_t gpuReferenceTimeInNs = 2000; + const int64_t cpuReferenceTimeInNs = 3000; + const auto maxKernelTsValue = maxNBitValue(32); + + NEO::TimeStampData referenceTs{static_cast(gpuReferenceTimeInNs / deviceTsFrequency), cpuReferenceTimeInNs}; + event->setReferenceTs(referenceTs); + + auto timeToTimeStamp = [&](uint32_t timeInNs) { + return static_cast(timeInNs / deviceTsFrequency); + }; + + packetData[0].contextStart = 50u; + packetData[0].contextEnd = 100u; + packetData[0].globalStart = timeToTimeStamp(4000u); + packetData[0].globalEnd = timeToTimeStamp(5000u); + + // Device Ts overflow case + packetData[1].contextStart = 20u; + packetData[1].contextEnd = 30u; + packetData[1].globalStart = timeToTimeStamp(500u); + packetData[1].globalEnd = timeToTimeStamp(1500u); + + packetData[2].contextStart = 20u; + packetData[2].contextEnd = 30u; + packetData[2].globalStart = timeToTimeStamp(5000u); + packetData[2].globalEnd = timeToTimeStamp(500u); + + event->hostAddress = packetData; + uint32_t count = 0; + + EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, nullptr)); + EXPECT_EQ(count, 3u); + + std::vector kernelTsBuffer(count); + std::vector synchronizedTsBuffer(count); + + ze_event_query_kernel_timestamps_results_ext_properties_t results{}; + results.pKernelTimestampsBuffer = kernelTsBuffer.data(); + results.pSynchronizedTimestampsBuffer = synchronizedTsBuffer.data(); + + for (uint32_t packetId = 0; packetId < count; packetId++) { + event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress); + event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets::getSinglePacketSize()); + } + + EXPECT_EQ(ZE_RESULT_SUCCESS, event->queryKernelTimestampsExt(device, &count, &results)); + uint64_t errorOffset = 5; + // Packet 1 + auto expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 4000u; + auto expectedGlobalEnd = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u; + EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelStart, expectedGlobalStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].global.kernelEnd, expectedGlobalEnd + errorOffset); + + auto expectedContextStart = expectedGlobalStart; + auto expectedContextEnd = expectedContextStart + (packetData[0].contextEnd - packetData[0].contextStart) * deviceTsFrequency; + EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelStart, expectedContextStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[0].context.kernelEnd, expectedContextEnd + errorOffset); + + // Packet 2 + expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 500u + + static_cast(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * deviceTsFrequency); + expectedGlobalEnd = expectedGlobalStart + (1500 - 500); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelStart, expectedGlobalStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].global.kernelEnd, expectedGlobalEnd + errorOffset); + + expectedContextStart = expectedGlobalStart; + expectedContextEnd = expectedContextStart + (packetData[1].contextEnd - packetData[1].contextStart) * deviceTsFrequency; + EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelStart, expectedContextStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[1].context.kernelEnd, expectedContextEnd + errorOffset); + + // Packet 3 + expectedGlobalStart = (cpuReferenceTimeInNs - gpuReferenceTimeInNs) + 5000u; + expectedGlobalEnd = expectedGlobalStart + (static_cast(maxKernelTsValue * deviceTsFrequency) - 5000u + 500u); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelStart, expectedGlobalStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].global.kernelEnd, expectedGlobalEnd + errorOffset); + + expectedContextStart = expectedGlobalStart; + expectedContextEnd = expectedContextStart + (packetData[2].contextEnd - packetData[1].contextStart) * deviceTsFrequency; + EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelStart, expectedContextStart + errorOffset); + EXPECT_GE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd - errorOffset); + EXPECT_LE(results.pSynchronizedTimestampsBuffer[2].context.kernelEnd, expectedContextEnd + errorOffset); +} + HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) { typename MockTimestampPackets32::Packet data = {}; data.contextStart = 1u; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 05b9f49ac8..962d31cda0 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -550,6 +550,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default DECLARE_DEBUG_VARIABLE(int32_t, EnableStateBaseAddressTracking, -1, "-1: default: enabled, 0: disabled, 1: enabled. This flag enables tracking state base address changes in command lists") DECLARE_DEBUG_VARIABLE(int32_t, SelectCmdListHeapAddressModel, -1, "-1: default, 0: private heaps, 1: stateless, 2: bindless, 3: bindful. This flag selects default command list heap address model. Values should match HeapAddressModel enum") DECLARE_DEBUG_VARIABLE(int32_t, EnableSetPair, -1, "Use SET_PAIR to pair two buffer objects behind the same file descriptor, -1: default, 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, CommandListTimestampRefreshIntervalInMilliSec, -1, "-1: use driver default, This value sets the refresh interval for getting synchronized GPU and CPU timestamp") /* Binary Cache */ DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation") diff --git a/shared/source/os_interface/linux/os_time_linux.cpp b/shared/source/os_interface/linux/os_time_linux.cpp index 5eb38e7732..0dcbca39d9 100644 --- a/shared/source/os_interface/linux/os_time_linux.cpp +++ b/shared/source/os_interface/linux/os_time_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,6 +10,7 @@ #include "shared/source/os_interface/linux/device_time_drm.h" #include "shared/source/os_interface/os_interface.h" +#include #include namespace NEO { diff --git a/shared/test/common/mocks/mock_ostime.h b/shared/test/common/mocks/mock_ostime.h index be78ad4f60..2a01714eec 100644 --- a/shared/test/common/mocks/mock_ostime.h +++ b/shared/test/common/mocks/mock_ostime.h @@ -88,4 +88,14 @@ class MockOSTimeWithConstTimestamp : public OSTime { return 0; } }; + +class MockOSTimeWithConfigurableCpuTimestamp : public MockOSTimeWithConstTimestamp { + public: + uint64_t mockCpuTime = 0; + bool getCpuTime(uint64_t *timeStamp) override { + *timeStamp = mockCpuTime; + return true; + } +}; + } // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index e4d4c8c259..d6e04f48d7 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -536,4 +536,5 @@ VfBarResourceAllocationWa = 1 EnableDynamicPostSyncAllocLayout = -1 ForceNumberOfThreadsInGpgpuThreadGroup = -1 PrintTimestampPacketUsage = -1 +CommandListTimestampRefreshIntervalInMilliSec = -1 # Please don't edit below this line