/* * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/wait_util.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event_impl.inl" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include namespace L0 { template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); template Event *Event::create(const EventDescriptor &, Device *, ze_result_t &); template Event *Event::create(const EventDescriptor &, Device *, ze_result_t &); bool Event::standaloneInOrderTimestampAllocationEnabled() { return (NEO::debugManager.flags.StandaloneInOrderTimestampAllocationEnabled.get() != 0); } ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) { this->context = static_cast(context); const bool counterBased = (counterBasedFlags != 0); if (isIpcPoolFlagSet() && counterBased) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } constexpr uint32_t supportedCounterBasedFlags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE | ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; if (counterBased && ((counterBasedFlags & supportedCounterBasedFlags) == 0)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } RootDeviceIndicesContainer rootDeviceIndices; uint32_t maxRootDeviceIndex = 0u; uint32_t currentNumDevices = numDevices; DriverHandleImp *driverHandleImp = static_cast(driver); bool useDevicesFromApi = true; this->isDeviceEventPoolAllocation = isEventPoolDeviceAllocationFlagSet(); if (numDevices == 0) { currentNumDevices = static_cast(driverHandleImp->devices.size()); useDevicesFromApi = false; } for (uint32_t i = 0u; i < currentNumDevices; i++) { Device *eventDevice = nullptr; if (useDevicesFromApi) { eventDevice = Device::fromHandle(deviceHandles[i]); } else { eventDevice = driverHandleImp->devices[i]; } if (!eventDevice) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } devices.push_back(eventDevice); rootDeviceIndices.pushUnique(eventDevice->getNEODevice()->getRootDeviceIndex()); if (maxRootDeviceIndex < eventDevice->getNEODevice()->getRootDeviceIndex()) { maxRootDeviceIndex = eventDevice->getNEODevice()->getRootDeviceIndex(); } isImplicitScalingCapable |= eventDevice->isImplicitScalingCapable(); } auto &rootDeviceEnvironment = getDevice()->getNEODevice()->getRootDeviceEnvironment(); auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper(); this->isDeviceEventPoolAllocation |= l0GfxCoreHelper.alwaysAllocateEventInLocalMem(); initializeSizeParameters(numDevices, deviceHandles, *driverHandleImp, rootDeviceEnvironment); NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::timestampPacketTagBuffer : NEO::AllocationType::bufferHostMemory; if (this->devices.size() > 1) { this->isDeviceEventPoolAllocation = false; } if (this->isDeviceEventPoolAllocation) { allocationType = NEO::AllocationType::gpuTimestampDeviceBuffer; } eventPoolAllocations = std::make_unique(maxRootDeviceIndex); bool allocatedMemory = false; auto neoDevice = devices[0]->getNEODevice(); if (this->isDeviceEventPoolAllocation) { this->isHostVisibleEventPoolAllocation = !(isEventPoolDeviceAllocationFlagSet()); NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, neoDevice->getDeviceBitfield()}; allocationProperties.alignment = eventAlignment; auto memoryManager = driver->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties); if (graphicsAllocation) { eventPoolAllocations->addAllocation(graphicsAllocation); allocatedMemory = true; if (isIpcPoolFlagSet()) { uint64_t handle = 0; this->isShareableEventMemory = (graphicsAllocation->peekInternalHandle(memoryManager, handle) == 0); } } } else { this->isHostVisibleEventPoolAllocation = true; NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, systemMemoryBitfield}; allocationProperties.alignment = eventAlignment; eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, *eventPoolAllocations); if (isIpcPoolFlagSet()) { this->isShareableEventMemory = eventPoolAllocations->getDefaultGraphicsAllocation()->isShareableHostMemory(); } allocatedMemory = (nullptr != eventPoolPtr); } if (!allocatedMemory) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } if (neoDevice->getDefaultEngine().commandStreamReceiver->isTbxMode()) { eventPoolAllocations->getDefaultGraphicsAllocation()->setWriteMemoryOnly(true); } return ZE_RESULT_SUCCESS; } EventPool::~EventPool() { if (eventPoolAllocations) { auto graphicsAllocations = eventPoolAllocations->getGraphicsAllocations(); auto memoryManager = devices[0]->getDriverHandle()->getMemoryManager(); for (auto gpuAllocation : graphicsAllocations) { memoryManager->freeGraphicsMemory(gpuAllocation); } } } ze_result_t EventPool::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t EventPool::createEvent(const ze_event_desc_t *desc, ze_event_handle_t *eventHandle) { if (desc->index > (getNumEvents() - 1)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto &l0GfxCoreHelper = getDevice()->getNEODevice()->getRootDeviceEnvironment().getHelper(); *eventHandle = l0GfxCoreHelper.createEvent(this, desc, getDevice()); return ZE_RESULT_SUCCESS; } ze_result_t EventPool::getContextHandle(ze_context_handle_t *phContext) { *phContext = context->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t EventPool::getFlags(ze_event_pool_flags_t *pFlags) { *pFlags = eventPoolFlags; if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) { *pFlags &= ~ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; } return ZE_RESULT_SUCCESS; } void EventPool::initializeSizeParameters(uint32_t numDevices, ze_device_handle_t *deviceHandles, DriverHandleImp &driver, const NEO::RootDeviceEnvironment &rootDeviceEnvironment) { auto &l0GfxCoreHelper = rootDeviceEnvironment.getHelper(); auto &gfxCoreHelper = rootDeviceEnvironment.getHelper(); setEventAlignment(static_cast(gfxCoreHelper.getTimestampPacketAllocatorAlignment())); auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); bool useDynamicEventPackets = l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo); eventPackets = EventPacketsCount::eventPackets; maxKernelCount = EventPacketsCount::maxKernelSplit; if (useDynamicEventPackets) { eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles); maxKernelCount = driver.getEventMaxKernelCount(numDevices, deviceHandles); } auto eventSize = eventPackets * gfxCoreHelper.getSingleTimestampPacketSize(); if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) { eventSize += sizeof(NEO::TimeStampData); } setEventSize(static_cast(alignUp(eventSize, eventAlignment))); eventPoolSize = alignUp(this->numEvents * eventSize, MemoryConstants::pageSize64k); } EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles, const ze_event_pool_desc_t *desc, ze_result_t &result) { auto eventPool = std::make_unique(desc); result = eventPool->initialize(driver, context, numDevices, deviceHandles); if (result) { return nullptr; } return eventPool.release(); } void EventPool::setupDescriptorFlags(const ze_event_pool_desc_t *desc) { eventPoolFlags = desc->flags; if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP) { eventPoolFlags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; } this->isIpcPoolFlag = !!(eventPoolFlags & ZE_EVENT_POOL_FLAG_IPC); auto pNext = reinterpret_cast(desc->pNext); if (pNext && pNext->stype == ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC) { auto counterBasedDesc = reinterpret_cast(pNext); counterBasedFlags = counterBasedDesc->flags; if (counterBasedFlags == 0) { counterBasedFlags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE; } } } bool EventPool::isEventPoolTimestampFlagSet() const { if (NEO::debugManager.flags.OverrideTimestampEvents.get() != -1) { auto timestampOverride = !!NEO::debugManager.flags.OverrideTimestampEvents.get(); return timestampOverride; } if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP) { return true; } return false; } ze_result_t EventPool::closeIpcHandle() { return this->destroy(); } ze_result_t Event::openCounterBasedIpcHandle(const IpcCounterBasedEventData &ipcData, ze_event_handle_t *eventHandle, DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) { auto device = Device::fromHandle(*deviceHandles); auto neoDevice = device->getNEODevice(); auto memoryManager = driver->getMemoryManager(); NEO::MemoryManager::OsHandleData deviceOsHandleData{ipcData.deviceHandle}; NEO::MemoryManager::OsHandleData hostOsHandleData{ipcData.hostHandle}; NEO::AllocationProperties unifiedMemoryProperties{ipcData.rootDeviceIndex, MemoryConstants::pageSize64k, NEO::DeviceAllocNodeType::getAllocationType(), systemMemoryBitfield}; unifiedMemoryProperties.subDevicesBitfield = neoDevice->getDeviceBitfield(); auto *deviceAlloc = memoryManager->createGraphicsAllocationFromSharedHandle(deviceOsHandleData, unifiedMemoryProperties, false, (ipcData.hostHandle == 0), false, nullptr); if (!deviceAlloc) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } if (neoDevice->getDefaultEngine().commandStreamReceiver->isTbxMode()) { deviceAlloc->setWriteMemoryOnly(true); } NEO::GraphicsAllocation *hostAlloc = nullptr; if (ipcData.hostHandle != 0) { unifiedMemoryProperties.allocationType = NEO::DeviceAllocNodeType::getAllocationType(); hostAlloc = memoryManager->createGraphicsAllocationFromSharedHandle(hostOsHandleData, unifiedMemoryProperties, false, true, false, nullptr); if (!hostAlloc) { memoryManager->freeGraphicsMemory(deviceAlloc); return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } if (neoDevice->getDefaultEngine().commandStreamReceiver->isTbxMode()) { hostAlloc->setWriteMemoryOnly(true); } } else { hostAlloc = deviceAlloc; } auto inOrderExecInfo = NEO::InOrderExecInfo::createFromExternalAllocation(*neoDevice, deviceAlloc, deviceAlloc->getGpuAddress(), hostAlloc, static_cast(hostAlloc->getUnderlyingBuffer()), ipcData.counterValue, ipcData.devicePartitions, ipcData.hostPartitions); const EventDescriptor eventDescriptor = { nullptr, // eventPoolAllocation nullptr, // extensions 0, // totalEventSize EventPacketsCount::maxKernelSplit, // maxKernelCount 1, // maxPacketsCount ipcData.counterBasedFlags, // counterBasedFlags 0, // index ipcData.signalScopeFlags, // signalScope ipcData.waitScopeFlags, // waitScope false, // timestampPool false, // kernelMappedTsPoolFlag true, // importedIpcPool false, // ipcPool }; ze_result_t result = ZE_RESULT_SUCCESS; auto event = Event::create(eventDescriptor, device, result); event->updateInOrderExecState(inOrderExecInfo, ipcData.counterValue, ipcData.counterOffset); *eventHandle = event; return result; } ze_result_t Event::getCounterBasedIpcHandle(IpcCounterBasedEventData &ipcData) { if (!this->isSharableCounterBased) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (!isCounterBasedExplicitlyEnabled() || !this->inOrderExecInfo.get() || isEventTimestampFlagSet()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ipcData = {}; ipcData.rootDeviceIndex = device->getRootDeviceIndex(); ipcData.counterValue = this->getInOrderExecSignalValueWithSubmissionCounter(); ipcData.counterBasedFlags = this->counterBasedFlags; ipcData.signalScopeFlags = this->signalScope; ipcData.waitScopeFlags = this->waitScope; auto memoryManager = device->getNEODevice()->getMemoryManager(); auto deviceAlloc = inOrderExecInfo->getDeviceCounterAllocation(); uint64_t handle = 0; if (int retCode = deviceAlloc->peekInternalHandle(memoryManager, handle); retCode != 0) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } memoryManager->registerIpcExportedAllocation(deviceAlloc); ipcData.deviceHandle = handle; ipcData.devicePartitions = inOrderExecInfo->getNumDevicePartitionsToWait(); ipcData.hostPartitions = ipcData.devicePartitions; ipcData.counterOffset = static_cast(inOrderExecInfo->getBaseDeviceAddress() - deviceAlloc->getGpuAddress()) + inOrderAllocationOffset; if (inOrderExecInfo->isHostStorageDuplicated()) { auto hostAlloc = inOrderExecInfo->getHostCounterAllocation(); if (int retCode = hostAlloc->peekInternalHandle(memoryManager, handle); retCode != 0) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } memoryManager->registerIpcExportedAllocation(hostAlloc); ipcData.hostHandle = handle; ipcData.hostPartitions = inOrderExecInfo->getNumHostPartitionsToWait(); } return ZE_RESULT_SUCCESS; } ze_result_t EventPool::getIpcHandle(ze_ipc_event_pool_handle_t *ipcHandle) { if (!this->isShareableEventMemory) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } IpcEventPoolData &poolData = *reinterpret_cast(ipcHandle->data); poolData = {}; poolData.numEvents = this->numEvents; poolData.rootDeviceIndex = this->getDevice()->getRootDeviceIndex(); poolData.isDeviceEventPoolAllocation = this->isDeviceEventPoolAllocation; poolData.isHostVisibleEventPoolAllocation = this->isHostVisibleEventPoolAllocation; poolData.isImplicitScalingCapable = this->isImplicitScalingCapable; poolData.maxEventPackets = this->getEventMaxPackets(); poolData.numDevices = static_cast(this->devices.size()); poolData.isEventPoolKernelMappedTsFlagSet = this->isEventPoolKernelMappedTsFlagSet(); auto memoryManager = this->context->getDriverHandle()->getMemoryManager(); auto allocation = this->eventPoolAllocations->getDefaultGraphicsAllocation(); uint64_t handle{}; if (int retCode = allocation->peekInternalHandle(memoryManager, handle); retCode != 0) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } poolData.handle = handle; memoryManager->registerIpcExportedAllocation(allocation); return ZE_RESULT_SUCCESS; } ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &ipcEventPoolHandle, ze_event_pool_handle_t *eventPoolHandle, DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) { const IpcEventPoolData &poolData = *reinterpret_cast(ipcEventPoolHandle.data); ze_event_pool_desc_t desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; if (poolData.isEventPoolKernelMappedTsFlagSet) { desc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP; } desc.count = static_cast(poolData.numEvents); auto eventPool = std::make_unique(&desc); eventPool->isDeviceEventPoolAllocation = poolData.isDeviceEventPoolAllocation; eventPool->isHostVisibleEventPoolAllocation = poolData.isHostVisibleEventPoolAllocation; eventPool->isImplicitScalingCapable = poolData.isImplicitScalingCapable; ze_device_handle_t *deviceHandlesUsed = deviceHandles; UNRECOVERABLE_IF(numDevices == 0); auto device = Device::fromHandle(*deviceHandles); auto neoDevice = device->getNEODevice(); NEO::MemoryManager::OsHandleData osHandleData{poolData.handle}; if (poolData.numDevices == 1) { for (uint32_t i = 0; i < numDevices; i++) { auto deviceStruct = Device::fromHandle(deviceHandles[i]); auto neoDeviceIteration = deviceStruct->getNEODevice(); if (neoDeviceIteration->getRootDeviceIndex() == poolData.rootDeviceIndex) { *deviceHandlesUsed = deviceHandles[i]; neoDevice = neoDeviceIteration; break; } } numDevices = 1; } eventPool->initializeSizeParameters(numDevices, deviceHandlesUsed, *driver, neoDevice->getRootDeviceEnvironment()); if (eventPool->getEventMaxPackets() != poolData.maxEventPackets) { PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "IPC handle max event packets %u does not match context devices max event packet %u\n", poolData.maxEventPackets, eventPool->getEventMaxPackets()); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::AllocationType allocationType = NEO::AllocationType::bufferHostMemory; if (eventPool->isDeviceEventPoolAllocation) { allocationType = NEO::AllocationType::gpuTimestampDeviceBuffer; } NEO::AllocationProperties unifiedMemoryProperties{poolData.rootDeviceIndex, eventPool->getEventPoolSize(), allocationType, systemMemoryBitfield}; unifiedMemoryProperties.subDevicesBitfield = neoDevice->getDeviceBitfield(); auto memoryManager = driver->getMemoryManager(); NEO::GraphicsAllocation *alloc = memoryManager->createGraphicsAllocationFromSharedHandle(osHandleData, unifiedMemoryProperties, false, eventPool->isHostVisibleEventPoolAllocation, false, nullptr); if (alloc == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } if (neoDevice->getDefaultEngine().commandStreamReceiver->isTbxMode()) { alloc->setWriteMemoryOnly(true); } eventPool->context = context; eventPool->eventPoolAllocations = std::make_unique(static_cast(context->rootDeviceIndices.size())); eventPool->eventPoolAllocations->addAllocation(alloc); eventPool->eventPoolPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); for (uint32_t i = 0; i < numDevices; i++) { eventPool->devices.push_back(Device::fromHandle(deviceHandlesUsed[i])); } eventPool->isImportedIpcPool = true; if (numDevices > 1) { for (auto currDeviceIndex : context->rootDeviceIndices) { if (currDeviceIndex == poolData.rootDeviceIndex) { continue; } unifiedMemoryProperties.rootDeviceIndex = currDeviceIndex; unifiedMemoryProperties.flags.isUSMHostAllocation = true; unifiedMemoryProperties.flags.forceSystemMemory = true; unifiedMemoryProperties.flags.allocateMemory = false; auto graphicsAllocation = memoryManager->createGraphicsAllocationFromExistingStorage(unifiedMemoryProperties, eventPool->eventPoolPtr, eventPool->getAllocation()); if (!graphicsAllocation) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } eventPool->eventPoolAllocations->addAllocation(graphicsAllocation); } } *eventPoolHandle = eventPool.release(); return ZE_RESULT_SUCCESS; } void Event::releaseTempInOrderTimestampNodes() { if (inOrderExecInfo) { inOrderExecInfo->releaseNotUsedTempTimestampNodes(false); } } ze_result_t Event::destroy() { resetInOrderTimestampNode(nullptr); releaseTempInOrderTimestampNodes(); if (isCounterBasedExplicitlyEnabled() && isFromIpcPool) { auto memoryManager = device->getNEODevice()->getMemoryManager(); memoryManager->freeGraphicsMemory(inOrderExecInfo->getExternalDeviceAllocation()); if (inOrderExecInfo->isHostStorageDuplicated()) { memoryManager->freeGraphicsMemory(inOrderExecInfo->getExternalHostAllocation()); } } delete this; return ZE_RESULT_SUCCESS; } void Event::enableCounterBasedMode(bool apiRequest, uint32_t flags) { if (counterBasedMode == CounterBasedMode::initiallyDisabled) { counterBasedMode = apiRequest ? CounterBasedMode::explicitlyEnabled : CounterBasedMode::implicitlyEnabled; counterBasedFlags = flags; } } void Event::disableImplicitCounterBasedMode() { if (isCounterBasedExplicitlyEnabled()) { return; } if (counterBasedMode == CounterBasedMode::implicitlyEnabled || counterBasedMode == CounterBasedMode::initiallyDisabled) { counterBasedMode = CounterBasedMode::implicitlyDisabled; counterBasedFlags = 0; unsetInOrderExecInfo(); } } uint64_t Event::getGpuAddress(Device *device) const { if (inOrderTimestampNode) { return inOrderTimestampNode->getGpuAddress(); } return getAllocation(device)->getGpuAddress() + this->eventPoolOffset; } void *Event::getHostAddress() const { if (inOrderTimestampNode) { return inOrderTimestampNode->getCpuBase(); } return this->hostAddressFromPool; } NEO::GraphicsAllocation *Event::getAllocation(Device *device) const { auto rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex(); if (inOrderTimestampNode) { return inOrderTimestampNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex); } else if (eventPoolAllocation) { return eventPoolAllocation->getGraphicsAllocation(rootDeviceIndex); } return nullptr; } void Event::setGpuStartTimestamp() { if (isEventTimestampFlagSet()) { this->device->getGlobalTimestamps(&cpuStartTimestamp, &gpuStartTimestamp); cpuStartTimestamp = cpuStartTimestamp / this->device->getNEODevice()->getDeviceInfo().outProfilingTimerResolution; } } void Event::setGpuEndTimestamp() { if (isEventTimestampFlagSet()) { auto resolution = this->device->getNEODevice()->getDeviceInfo().outProfilingTimerResolution; uint64_t cpuEndTimestamp = 0; this->device->getNEODevice()->getOSTime()->getCpuTime(&cpuEndTimestamp); cpuEndTimestamp = cpuEndTimestamp / resolution; this->gpuEndTimestamp = gpuStartTimestamp + std::max(1u, (cpuEndTimestamp - cpuStartTimestamp)); } } void *Event::getCompletionFieldHostAddress() const { return ptrOffset(getHostAddress(), getCompletionFieldOffset()); } void Event::increaseKernelCount() { kernelCount++; UNRECOVERABLE_IF(kernelCount > maxKernelCount); } void Event::resetPackets(bool resetAllPackets) { if (resetAllPackets) { resetKernelCountAndPacketUsedCount(); } cpuStartTimestamp = 0; gpuStartTimestamp = 0; gpuEndTimestamp = 0; this->csrs.clear(); this->csrs.push_back(this->device->getNEODevice()->getDefaultEngine().commandStreamReceiver); } void Event::setIsCompleted() { if (this->isCompleted.load() == STATE_CLEARED) { this->isCompleted = STATE_SIGNALED; } unsetCmdQueue(); } void Event::updateInOrderExecState(std::shared_ptr &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset) { resetCompletionStatus(); if (this->inOrderExecInfo.get() != newInOrderExecInfo.get()) { inOrderExecInfo = newInOrderExecInfo; } inOrderExecSignalValue = signalValue; inOrderAllocationOffset = allocationOffset; } uint64_t Event::getInOrderExecSignalValueWithSubmissionCounter() const { uint64_t appendCounter = inOrderExecInfo.get() ? NEO::InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo) : 0; return (inOrderExecSignalValue + appendCounter); } void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) { this->latestUsedCmdQueue = newCmdQ; } void Event::unsetCmdQueue() { for (auto &csr : csrs) { csr->unregisterClient(latestUsedCmdQueue); } latestUsedCmdQueue = nullptr; } void Event::setReferenceTs(uint64_t currentCpuTimeStamp) { NEO::TimeStampData *referenceTs = static_cast(ptrOffset(getHostAddress(), maxPacketCount * singlePacketSize)); const auto recalculate = (currentCpuTimeStamp - referenceTs->cpuTimeinNS) > timestampRefreshIntervalInNanoSec; if (referenceTs->cpuTimeinNS == 0 || recalculate) { device->getNEODevice()->getOSTime()->getGpuCpuTime(referenceTs, true); } } void Event::unsetInOrderExecInfo() { resetInOrderTimestampNode(nullptr); inOrderExecInfo.reset(); inOrderAllocationOffset = 0; inOrderExecSignalValue = 0; } void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode) { if (inOrderTimestampNode) { inOrderExecInfo->pushTempTimestampNode(inOrderTimestampNode, inOrderExecSignalValue); } inOrderTimestampNode = newNode; } ze_result_t Event::enableExtensions(const EventDescriptor &eventDescriptor) { bool interruptMode = false; bool kmdWaitMode = false; bool externalInterruptWait = false; auto extendedDesc = reinterpret_cast(eventDescriptor.extensions); while (extendedDesc) { if (extendedDesc->stype == ZEX_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC) { auto eventSyncModeDesc = reinterpret_cast(extendedDesc); interruptMode = (eventSyncModeDesc->syncModeFlags & ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT); kmdWaitMode = (eventSyncModeDesc->syncModeFlags & ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT); externalInterruptWait = (eventSyncModeDesc->syncModeFlags & ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_EXTERNAL_INTERRUPT_WAIT); if (externalInterruptWait) { setExternalInterruptId(eventSyncModeDesc->externalInterruptId); UNRECOVERABLE_IF(eventSyncModeDesc->externalInterruptId > 0 && eventDescriptor.eventPoolAllocation); } } else if (extendedDesc->stype == ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_SYNC_ALLOC_PROPERTIES) { auto externalSyncAllocProperties = reinterpret_cast(extendedDesc); if (!externalSyncAllocProperties->deviceAddress || !externalSyncAllocProperties->hostAddress) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::SvmAllocationData *externalHostAllocData = nullptr; if (!device->getDriverHandle()->findAllocationDataForRange(externalSyncAllocProperties->hostAddress, sizeof(uint64_t), externalHostAllocData)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto allocation = externalHostAllocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); auto inOrderExecInfo = NEO::InOrderExecInfo::createFromExternalAllocation(*device->getNEODevice(), nullptr, castToUint64(externalSyncAllocProperties->deviceAddress), allocation, externalSyncAllocProperties->hostAddress, externalSyncAllocProperties->completionValue, 1, 1); updateInOrderExecState(inOrderExecInfo, externalSyncAllocProperties->completionValue, 0); } extendedDesc = reinterpret_cast(extendedDesc->pNext); } interruptMode |= (NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.get() == 1); kmdWaitMode |= (NEO::debugManager.flags.WaitForUserFenceOnEventHostSynchronize.get() == 1); if (interruptMode) { enableInterruptMode(); } if (externalInterruptWait || (interruptMode && kmdWaitMode)) { enableKmdWaitMode(); } return ZE_RESULT_SUCCESS; } } // namespace L0