/* * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/assert_handler/assert_handler.h" #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/queue_throttle.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/debugger/debugger_l0.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/product_helper.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/source/helpers/properties_parser.h" #include "level_zero/core/source/kernel/kernel.h" #include "igfxfmid.h" namespace L0 { CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {}; bool CommandQueue::frontEndTrackingEnabled() const { return NEO::debugManager.flags.AllowPatchingVfeStateInCommandLists.get() || this->frontEndStateTracking; } CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : desc(*desc), device(device), csr(csr) { int overrideCmdQueueSyncMode = NEO::debugManager.flags.OverrideCmdQueueSynchronousMode.get(); if (overrideCmdQueueSyncMode != -1) { this->desc.mode = static_cast(overrideCmdQueueSyncMode); } int overrideUseKmdWaitFunction = NEO::debugManager.flags.OverrideUseKmdWaitFunction.get(); if (overrideUseKmdWaitFunction != -1) { useKmdWaitFunction = !!(overrideUseKmdWaitFunction); } } ze_result_t CommandQueueImp::destroy() { unregisterCsrClient(); if (commandStream.getCpuBase() != nullptr) { commandStream.replaceGraphicsAllocation(nullptr); commandStream.replaceBuffer(nullptr, 0); } buffers.destroy(this->getDevice()); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { device->getL0Debugger()->notifyCommandQueueDestroyed(device->getNEODevice()); } delete this; return ZE_RESULT_SUCCESS; } ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal, bool immediateCmdListQueue) { ze_result_t returnValue; internalUsage = isInternal; returnValue = buffers.initialize(device, totalCmdBufferSize); if (returnValue == ZE_RESULT_SUCCESS) { NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation(); UNRECOVERABLE_IF(bufferAllocation == nullptr); commandStream.replaceBuffer(bufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); commandStream.replaceGraphicsAllocation(bufferAllocation); isCopyOnlyCommandQueue = copyOnly; preemptionCmdSyncProgramming = getPreemptionCmdProgramming(); activeSubDevices = static_cast(csr->getOsContext().getDeviceBitfield().count()); if (!isInternal) { partitionCount = csr->getActivePartitions(); } if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { device->getL0Debugger()->notifyCommandQueueCreated(device->getNEODevice()); } auto &hwInfo = device->getHwInfo(); auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment(); this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment); this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment); this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment); this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment); auto &productHelper = rootDeviceEnvironment.getHelper(); this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo); this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment); this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !(internalUsage && immediateCmdListQueue)); auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled); } return returnValue; } NEO::WaitStatus CommandQueueImp::reserveLinearStreamSize(size_t size) { auto waitStatus{NEO::WaitStatus::ready}; if (commandStream.getAvailableSpace() < size) { waitStatus = buffers.switchBuffers(csr); NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation(); commandStream.replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); commandStream.replaceGraphicsAllocation(nextBufferAllocation); } return waitStatus; } NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative) { UNRECOVERABLE_IF(csr == nullptr); NEO::BatchBuffer batchBuffer(this->startingCmdBuffer->getGraphicsAllocation(), offset, 0, 0, nullptr, false, NEO::getThrottleFromPowerSavingUint(csr->getUmdPowerHintValue()), NEO::QueueSliceCount::defaultSliceCount, this->startingCmdBuffer->getUsed(), this->startingCmdBuffer, endingCmdPtr, csr->getNumClients(), true, false, true, false); batchBuffer.disableFlatRingBuffer = true; if (this->startingCmdBuffer != &this->commandStream) { this->csr->makeResident(*this->commandStream.getGraphicsAllocation()); } commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId()); commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId()); csr->setActivePartitions(partitionCount); auto ret = csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations()); if (ret != NEO::SubmissionStatus::success) { commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId()); commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId()); return ret; } buffers.setCurrentFlushStamp(csr->peekTaskCount(), csr->obtainCurrentFlushStamp()); return ret; } ze_result_t CommandQueueImp::synchronize(uint64_t timeout) { if ((timeout == std::numeric_limits::max()) && useKmdWaitFunction) { auto &waitPair = buffers.getCurrentFlushStamp(); const auto waitStatus = csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, NEO::QueueThrottle::MEDIUM); if (waitStatus == NEO::WaitStatus::gpuHang) { postSyncOperations(true); return ZE_RESULT_ERROR_DEVICE_LOST; } postSyncOperations(false); return ZE_RESULT_SUCCESS; } else { return synchronizeByPollingForTaskCount(timeout); } } ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeoutNanoseconds) { UNRECOVERABLE_IF(csr == nullptr); auto taskCountToWait = getTaskCount(); bool enableTimeout = true; auto microsecondResolution = device->getNEODevice()->getMicrosecondResolution(); int64_t timeoutMicroseconds = static_cast(timeoutNanoseconds / microsecondResolution); if (timeoutNanoseconds == std::numeric_limits::max()) { enableTimeout = false; timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; } const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait); if (waitStatus == NEO::WaitStatus::notReady) { return ZE_RESULT_NOT_READY; } if (waitStatus == NEO::WaitStatus::gpuHang) { postSyncOperations(true); return ZE_RESULT_ERROR_DEVICE_LOST; } postSyncOperations(false); return ZE_RESULT_SUCCESS; } void CommandQueueImp::printKernelsPrintfOutput(bool hangDetected) { for (auto &kernelWeakPtr : this->printfKernelContainer) { std::lock_guard lock(static_cast(this->getDevice())->printfKernelMutex); if (!kernelWeakPtr.expired()) { kernelWeakPtr.lock()->printPrintfOutput(hangDetected); } } this->printfKernelContainer.clear(); } void CommandQueueImp::checkAssert() { bool valueExpected = true; bool hadAssert = cmdListWithAssertExecuted.compare_exchange_strong(valueExpected, false); if (hadAssert) { UNRECOVERABLE_IF(device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get() == nullptr); device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort(); } } void CommandQueueImp::postSyncOperations(bool hangDetected) { printKernelsPrintfOutput(hangDetected); checkAssert(); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::debugManager.flags.DebuggerLogBitmask.get()) { device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId()); } unregisterCsrClient(); } CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, bool immediateCmdListQueue, ze_result_t &returnValue) { CommandQueueAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandQueueFactory[productFamily]; } CommandQueueImp *commandQueue = nullptr; returnValue = ZE_RESULT_ERROR_UNINITIALIZED; if (!allocator) { return nullptr; } commandQueue = static_cast((*allocator)(device, csr, desc)); returnValue = commandQueue->initialize(isCopyOnly, isInternal, immediateCmdListQueue); if (returnValue != ZE_RESULT_SUCCESS) { commandQueue->destroy(); commandQueue = nullptr; return nullptr; } auto &osContext = csr->getOsContext(); DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); if (driverHandleImp->powerHint && driverHandleImp->powerHint != osContext.getUmdPowerHintValue()) { osContext.setUmdPowerHintValue(driverHandleImp->powerHint); osContext.reInitializeContext(); } csr->initializeResources(false, device->getDevicePreemptionMode()); csr->initDirectSubmission(); if (commandQueue->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) { csr->createGlobalStatelessHeap(); } return commandQueue; } void CommandQueueImp::unregisterCsrClient() { this->csr->unregisterClient(this); } void CommandQueueImp::registerCsrClient() { this->csr->registerClient(this); } ze_result_t CommandQueueImp::CommandBufferManager::initialize(Device *device, size_t sizeRequested) { size_t alignedSize = alignUp(sizeRequested, MemoryConstants::pageSize64k); NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, alignedSize, NEO::AllocationType::commandBuffer, (device->getNEODevice()->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */, false, device->getNEODevice()->getDeviceBitfield()}; auto firstBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::commandBuffer); if (!firstBuffer) { firstBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } auto secondBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::commandBuffer); if (!secondBuffer) { secondBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } buffers[BufferAllocation::first] = firstBuffer; buffers[BufferAllocation::second] = secondBuffer; if (!buffers[BufferAllocation::first] || !buffers[BufferAllocation::second]) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } flushId[BufferAllocation::first] = std::make_pair(0u, 0u); flushId[BufferAllocation::second] = std::make_pair(0u, 0u); return ZE_RESULT_SUCCESS; } void CommandQueueImp::CommandBufferManager::destroy(Device *device) { if (buffers[BufferAllocation::first]) { device->storeReusableAllocation(*buffers[BufferAllocation::first]); buffers[BufferAllocation::first] = nullptr; } if (buffers[BufferAllocation::second]) { device->storeReusableAllocation(*buffers[BufferAllocation::second]); buffers[BufferAllocation::second] = nullptr; } } NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) { if (bufferUse == BufferAllocation::first) { bufferUse = BufferAllocation::second; } else { bufferUse = BufferAllocation::first; } auto waitStatus{NEO::WaitStatus::ready}; auto completionId = flushId[bufferUse]; if (completionId.second != 0u) { UNRECOVERABLE_IF(csr == nullptr); waitStatus = csr->waitForTaskCountWithKmdNotifyFallback(completionId.first, completionId.second, false, NEO::QueueThrottle::MEDIUM); } return waitStatus; } void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect, bool performMigration) { NEO::Device *neoDevice = this->device->getNEODevice(); auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager(); auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(*(this->csr)); if (!submittedAsPack) { lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership(); NEO::ResidencyContainer residencyAllocations; svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), residencyAllocations, unifiedMemoryControls.generateMask()); makeResidentAndMigrate(performMigration, residencyAllocations); } } void CommandQueueImp::makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) { for (auto alloc : residencyContainer) { alloc->prepareHostPtrForResidency(csr); csr->makeResident(*alloc); if (performMigration && (alloc->getAllocationType() == NEO::AllocationType::svmGpu || alloc->getAllocationType() == NEO::AllocationType::svmCpu)) { auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); } } } ze_result_t CommandQueueImp::getOrdinal(uint32_t *pOrdinal) { *pOrdinal = desc.ordinal; return ZE_RESULT_SUCCESS; } ze_result_t CommandQueueImp::getIndex(uint32_t *pIndex) { *pIndex = desc.index; return ZE_RESULT_SUCCESS; } QueueProperties CommandQueue::extractQueueProperties(const ze_command_queue_desc_t &desc) { QueueProperties queueProperties = {}; auto baseProperties = static_cast(desc.pNext); while (baseProperties) { if (baseProperties->stype == ZEX_INTEL_STRUCTURE_TYPE_QUEUE_ALLOCATE_MSIX_HINT_EXP_PROPERTIES) { // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901 queueProperties.interruptHint = static_cast(desc.pNext)->uniqueMsix; } else if (auto syncDispatchMode = getSyncDispatchMode(baseProperties)) { if (syncDispatchMode.has_value()) { queueProperties.synchronizedDispatchMode = syncDispatchMode.value(); } } else if (baseProperties->stype == ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES) { // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901 queueProperties.copyOffloadHint = static_cast(desc.pNext)->copyOffloadEnabled; } baseProperties = static_cast(baseProperties->pNext); } return queueProperties; } } // namespace L0