/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/wait_util.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include #include namespace L0 { ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices, uint32_t numEvents) { std::vector rootDeviceIndices; uint32_t maxRootDeviceIndex = 0u; void *eventPoolPtr = nullptr; size_t alignedSize = alignUp(numEvents * eventSize, MemoryConstants::pageSize64k); NEO::GraphicsAllocation::AllocationType allocationType = isEventPoolUsedForTimestamp ? NEO::GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER : NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; DriverHandleImp *driverHandleImp = static_cast(driver); bool useDevicesFromApi = true; if (numDevices == 0) { numDevices = static_cast(driverHandleImp->devices.size()); useDevicesFromApi = false; } for (uint32_t i = 0u; i < numDevices; i++) { Device *eventDevice = nullptr; if (useDevicesFromApi) { eventDevice = Device::fromHandle(phDevices[i]); } else { eventDevice = driverHandleImp->devices[i]; } if (!eventDevice) { continue; } devices.push_back(eventDevice); rootDeviceIndices.push_back(eventDevice->getNEODevice()->getRootDeviceIndex()); if (maxRootDeviceIndex < eventDevice->getNEODevice()->getRootDeviceIndex()) { maxRootDeviceIndex = eventDevice->getNEODevice()->getRootDeviceIndex(); } } eventPoolAllocations = std::make_unique(maxRootDeviceIndex); NEO::AllocationProperties allocationProperties{rootDeviceIndices.at(0), alignedSize, allocationType, systemMemoryBitfield}; allocationProperties.alignment = eventAlignment; eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, *eventPoolAllocations); if (!eventPoolPtr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } return ZE_RESULT_SUCCESS; } EventPoolImp::~EventPoolImp() { auto graphicsAllocations = eventPoolAllocations->getGraphicsAllocations(); auto memoryManager = devices[0]->getDriverHandle()->getMemoryManager(); for (auto gpuAllocation : graphicsAllocations) { memoryManager->freeGraphicsMemory(gpuAllocation); } } ze_result_t EventPoolImp::getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t EventPoolImp::closeIpcHandle() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t EventPoolImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t EventPoolImp::createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { if (desc->index > (getNumEvents() - 1)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *phEvent = Event::create(this, desc, this->getDevice()); return ZE_RESULT_SUCCESS; } uint64_t EventImp::getGpuAddress(Device *device) { auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); return (alloc->getGpuAddress() + (index * eventPool->getEventSize())); } Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { auto event = new EventImp(eventPool, desc->index, device); UNRECOVERABLE_IF(event == nullptr); if (eventPool->isEventPoolUsedForTimestamp) { event->isTimestampEvent = true; event->kernelTimestampsData = std::make_unique[]>(EventPacketsCount::maxKernelSplit); } auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); uint64_t baseHostAddr = reinterpret_cast(alloc->getUnderlyingBuffer()); event->hostAddress = reinterpret_cast(baseHostAddr + (desc->index * eventPool->getEventSize())); event->signalScope = desc->signal; event->waitScope = desc->wait; event->csr = static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver; event->reset(); return event; } NEO::GraphicsAllocation &EventImp::getAllocation(Device *device) { return *this->eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); } void Event::resetPackets() { for (uint32_t i = 0; i < kernelCount; i++) { kernelTimestampsData[i].setPacketsUsed(1); } kernelCount = 1; } uint32_t Event::getPacketsInUse() { if (isTimestampEvent) { uint32_t packetsInUse = 0; for (uint32_t i = 0; i < kernelCount; i++) { packetsInUse += kernelTimestampsData[i].getPacketsUsed(); }; return packetsInUse; } else { return 1; } } void Event::setPacketsInUse(uint32_t value) { kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value); }; uint64_t Event::getPacketAddress(Device *device) { uint64_t address = getGpuAddress(device); if (isTimestampEvent && kernelCount > 1) { for (uint32_t i = 0; i < kernelCount - 1; i++) { address += kernelTimestampsData[i].getPacketsUsed() * NEO::TimestampPackets::getSinglePacketSize(); } } return address; } ze_result_t EventImp::calculateProfilingData() { globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0); globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0); contextStartTS = kernelTimestampsData[0].getContextStartValue(0); contextEndTS = kernelTimestampsData[0].getContextEndValue(0); for (uint32_t i = 0; i < kernelCount; i++) { for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) { if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) { globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId); } if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) { contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId); } if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) { contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId); } if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) { globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId); } } } return ZE_RESULT_SUCCESS; } void EventImp::assignTimestampData(void *address) { for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) { kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address); address = ptrOffset(address, NEO::TimestampPackets::getSinglePacketSize()); } } } ze_result_t Event::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t EventImp::queryStatusKernelTimestamp() { assignTimestampData(hostAddress); for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) { return ZE_RESULT_NOT_READY; } } } return ZE_RESULT_SUCCESS; } ze_result_t EventImp::queryStatus() { uint64_t *hostAddr = static_cast(hostAddress); uint32_t queryVal = Event::STATE_CLEARED; ze_result_t retVal; if (metricStreamer != nullptr) { *hostAddr = metricStreamer->getNotificationState(); } this->csr->downloadAllocations(); if (isTimestampEvent) { return queryStatusKernelTimestamp(); } memcpy_s(static_cast(&queryVal), sizeof(uint32_t), static_cast(hostAddr), sizeof(uint32_t)); retVal = (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; if (retVal == ZE_RESULT_NOT_READY) { return retVal; } if (updateTaskCountEnabled) { this->csr->flushTagUpdate(); updateTaskCountEnabled = false; } return retVal; } ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { auto baseAddr = reinterpret_cast(hostAddress); auto signalScopeFlag = this->signalScope; auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) { auto tsptr = reinterpret_cast(tsAddr); memcpy_s(tsptr, sizeof(uint32_t), static_cast(&eventVal), sizeof(uint32_t)); if (!signalScopeFlag) { NEO::CpuIntrinsics::clFlush(tsptr); } }; for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t i = 0; i < packetsToSet; i++) { eventTsSetFunc(baseAddr + NEO::TimestampPackets::getContextStartOffset()); eventTsSetFunc(baseAddr + NEO::TimestampPackets::getGlobalStartOffset()); eventTsSetFunc(baseAddr + NEO::TimestampPackets::getContextEndOffset()); eventTsSetFunc(baseAddr + NEO::TimestampPackets::getGlobalEndOffset()); baseAddr += NEO::TimestampPackets::getSinglePacketSize(); } } assignTimestampData(hostAddress); return ZE_RESULT_SUCCESS; } ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { if (isTimestampEvent) { return hostEventSetValueTimestamps(eventVal); } auto hostAddr = static_cast(hostAddress); UNRECOVERABLE_IF(hostAddr == nullptr); memcpy_s(static_cast(hostAddr), sizeof(uint32_t), static_cast(&eventVal), sizeof(uint32_t)); if (updateTaskCountEnabled) { this->csr->flushTagUpdate(); updateTaskCountEnabled = false; } NEO::CpuIntrinsics::clFlush(hostAddr); return ZE_RESULT_SUCCESS; } ze_result_t EventImp::hostSignal() { return hostEventSetValue(Event::STATE_SIGNALED); } ze_result_t EventImp::hostSynchronize(uint64_t timeout) { std::chrono::high_resolution_clock::time_point time1, time2; uint64_t timeDiff = 0; ze_result_t ret = ZE_RESULT_NOT_READY; if (this->csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { return ZE_RESULT_SUCCESS; } if (timeout == 0) { return queryStatus(); } time1 = std::chrono::high_resolution_clock::now(); while (true) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { return ret; } NEO::WaitUtils::waitFunction(nullptr, 0u); if (timeout == std::numeric_limits::max()) { continue; } time2 = std::chrono::high_resolution_clock::now(); timeDiff = std::chrono::duration_cast(time2 - time1).count(); if (timeDiff >= timeout) { break; } } return ret; } ze_result_t EventImp::reset() { if (isTimestampEvent) { kernelCount = EventPacketsCount::maxKernelSplit; for (uint32_t i = 0; i < kernelCount; i++) { kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); } hostEventSetValue(Event::STATE_INITIAL); resetPackets(); return ZE_RESULT_SUCCESS; } else { return hostEventSetValue(Event::STATE_INITIAL); } } ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) { ze_kernel_timestamp_result_t &result = *dstptr; if (queryStatus() != ZE_RESULT_SUCCESS) { return ZE_RESULT_NOT_READY; } assignTimestampData(hostAddress); calculateProfilingData(); auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) { memcpy_s(&(timestampFieldForWriting), sizeof(uint64_t), static_cast(×tampFieldToCopy), sizeof(uint64_t)); }; if (!NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { eventTsSetFunc(contextStartTS, result.context.kernelStart); eventTsSetFunc(globalStartTS, result.global.kernelStart); eventTsSetFunc(contextEndTS, result.context.kernelEnd); eventTsSetFunc(globalEndTS, result.global.kernelEnd); } else { eventTsSetFunc(globalStartTS, result.context.kernelStart); eventTsSetFunc(globalStartTS, result.global.kernelStart); eventTsSetFunc(globalEndTS, result.context.kernelEnd); eventTsSetFunc(globalEndTS, result.global.kernelEnd); } return ZE_RESULT_SUCCESS; } EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices, const ze_event_pool_desc_t *desc) { auto eventPool = new (std::nothrow) EventPoolImp(driver, numDevices, phDevices, desc->count, desc->flags); if (!eventPool) { DEBUG_BREAK_IF(true); return nullptr; } ze_result_t result = eventPool->initialize(driver, context, numDevices, phDevices, desc->count); if (result) { delete eventPool; return nullptr; } return eventPool; } } // namespace L0