compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp

358 lines
16 KiB
C++

/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/queue_throttle.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/product_helper.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/kernel/kernel.h"
#include "igfxfmid.h"
namespace L0 {
CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {};
bool CommandQueue::frontEndTrackingEnabled() const {
return NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get() || this->frontEndStateTracking;
}
CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
: desc(*desc), device(device), csr(csr) {
int overrideCmdQueueSyncMode = NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.get();
if (overrideCmdQueueSyncMode != -1) {
this->desc.mode = static_cast<ze_command_queue_mode_t>(overrideCmdQueueSyncMode);
}
int overrideUseKmdWaitFunction = NEO::DebugManager.flags.OverrideUseKmdWaitFunction.get();
if (overrideUseKmdWaitFunction != -1) {
useKmdWaitFunction = !!(overrideUseKmdWaitFunction);
}
}
ze_result_t CommandQueueImp::destroy() {
unregisterCsrClient();
if (commandStream.getCpuBase() != nullptr) {
commandStream.replaceGraphicsAllocation(nullptr);
commandStream.replaceBuffer(nullptr, 0);
}
buffers.destroy(this->getDevice());
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
device->getL0Debugger()->notifyCommandQueueDestroyed(device->getNEODevice());
}
delete this;
return ZE_RESULT_SUCCESS;
}
ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal, bool immediateCmdListQueue) {
ze_result_t returnValue;
internalUsage = isInternal;
internalQueueForImmediateCommandList = immediateCmdListQueue;
returnValue = buffers.initialize(device, totalCmdBufferSize);
if (returnValue == ZE_RESULT_SUCCESS) {
NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation();
UNRECOVERABLE_IF(bufferAllocation == nullptr);
commandStream.replaceBuffer(bufferAllocation->getUnderlyingBuffer(),
defaultQueueCmdBufferSize);
commandStream.replaceGraphicsAllocation(bufferAllocation);
isCopyOnlyCommandQueue = copyOnly;
preemptionCmdSyncProgramming = getPreemptionCmdProgramming();
activeSubDevices = static_cast<uint32_t>(csr->getOsContext().getDeviceBitfield().count());
if (!isInternal) {
partitionCount = csr->getActivePartitions();
}
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
device->getL0Debugger()->notifyCommandQueueCreated(device->getNEODevice());
}
auto &hwInfo = device->getHwInfo();
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
this->stateComputeModeTracking = L0GfxCoreHelper::enableStateComputeModeTracking(rootDeviceEnvironment);
this->frontEndStateTracking = L0GfxCoreHelper::enableFrontEndStateTracking(rootDeviceEnvironment);
this->pipelineSelectStateTracking = L0GfxCoreHelper::enablePipelineSelectStateTracking(rootDeviceEnvironment);
this->stateBaseAddressTracking = L0GfxCoreHelper::enableStateBaseAddressTracking(rootDeviceEnvironment);
auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo);
this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment);
this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !immediateCmdListQueue);
}
return returnValue;
}
NEO::WaitStatus CommandQueueImp::reserveLinearStreamSize(size_t size) {
auto waitStatus{NEO::WaitStatus::Ready};
if (commandStream.getAvailableSpace() < size) {
waitStatus = buffers.switchBuffers(csr);
NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation();
commandStream.replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(),
defaultQueueCmdBufferSize);
commandStream.replaceGraphicsAllocation(nextBufferAllocation);
}
return waitStatus;
}
NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
bool isCooperative) {
UNRECOVERABLE_IF(csr == nullptr);
NEO::BatchBuffer batchBuffer(this->startingCmdBuffer->getGraphicsAllocation(), offset, 0, 0, nullptr, false,
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
this->startingCmdBuffer->getUsed(), this->startingCmdBuffer, endingCmdPtr, csr->getNumClients(), true, false);
batchBuffer.disableFlatRingBuffer = true;
if (this->startingCmdBuffer != &this->commandStream) {
this->csr->makeResident(*this->commandStream.getGraphicsAllocation());
}
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
csr->setActivePartitions(partitionCount);
auto ret = csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations());
if (ret != NEO::SubmissionStatus::SUCCESS) {
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId());
return ret;
}
buffers.setCurrentFlushStamp(csr->peekTaskCount(), csr->obtainCurrentFlushStamp());
return ret;
}
ze_result_t CommandQueueImp::synchronize(uint64_t timeout) {
if ((timeout == std::numeric_limits<uint64_t>::max()) && useKmdWaitFunction) {
auto &waitPair = buffers.getCurrentFlushStamp();
const auto waitStatus = csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, NEO::QueueThrottle::MEDIUM);
if (waitStatus == NEO::WaitStatus::GpuHang) {
postSyncOperations(true);
return ZE_RESULT_ERROR_DEVICE_LOST;
}
postSyncOperations(false);
return ZE_RESULT_SUCCESS;
} else {
return synchronizeByPollingForTaskCount(timeout);
}
}
ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout) {
UNRECOVERABLE_IF(csr == nullptr);
auto taskCountToWait = getTaskCount();
bool enableTimeout = true;
int64_t timeoutMicroseconds = static_cast<int64_t>(timeout);
if (timeout == std::numeric_limits<uint64_t>::max()) {
enableTimeout = false;
timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
}
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
if (waitStatus == NEO::WaitStatus::NotReady) {
return ZE_RESULT_NOT_READY;
}
if (waitStatus == NEO::WaitStatus::GpuHang) {
postSyncOperations(true);
return ZE_RESULT_ERROR_DEVICE_LOST;
}
postSyncOperations(false);
return ZE_RESULT_SUCCESS;
}
void CommandQueueImp::printKernelsPrintfOutput(bool hangDetected) {
size_t size = this->printfKernelContainer.size();
for (size_t i = 0; i < size; i++) {
this->printfKernelContainer[i]->printPrintfOutput(hangDetected);
}
this->printfKernelContainer.clear();
}
void CommandQueueImp::checkAssert() {
bool valueExpected = true;
bool hadAssert = cmdListWithAssertExecuted.compare_exchange_strong(valueExpected, false);
if (hadAssert) {
UNRECOVERABLE_IF(device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get() == nullptr);
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
}
void CommandQueueImp::postSyncOperations(bool hangDetected) {
printKernelsPrintfOutput(hangDetected);
checkAssert();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
}
unregisterCsrClient();
}
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, bool immediateCmdListQueue, ze_result_t &returnValue) {
CommandQueueAllocatorFn allocator = nullptr;
if (productFamily < IGFX_MAX_PRODUCT) {
allocator = commandQueueFactory[productFamily];
}
CommandQueueImp *commandQueue = nullptr;
returnValue = ZE_RESULT_ERROR_UNINITIALIZED;
if (!allocator) {
return nullptr;
}
commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc));
returnValue = commandQueue->initialize(isCopyOnly, isInternal, immediateCmdListQueue);
if (returnValue != ZE_RESULT_SUCCESS) {
commandQueue->destroy();
commandQueue = nullptr;
return nullptr;
}
auto &osContext = csr->getOsContext();
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
if (driverHandleImp->powerHint && driverHandleImp->powerHint != osContext.getUmdPowerHintValue()) {
osContext.setUmdPowerHintValue(driverHandleImp->powerHint);
osContext.reInitializeContext();
}
csr->initializeResources();
csr->initDirectSubmission();
if (commandQueue->cmdListHeapAddressModel == NEO::HeapAddressModel::GlobalStateless) {
csr->createGlobalStatelessHeap();
}
return commandQueue;
}
void CommandQueueImp::unregisterCsrClient() {
if (getClientId() != CommandQueue::clientNotRegistered) {
this->csr->unregisterClient();
setClientId(CommandQueue::clientNotRegistered);
}
}
ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() const {
return desc.mode;
}
ze_result_t CommandQueueImp::CommandBufferManager::initialize(Device *device, size_t sizeRequested) {
size_t alignedSize = alignUp<size_t>(sizeRequested, MemoryConstants::pageSize64k);
NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, alignedSize,
NEO::AllocationType::COMMAND_BUFFER,
(device->getNEODevice()->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */,
false,
device->getNEODevice()->getDeviceBitfield()};
auto firstBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::COMMAND_BUFFER);
if (!firstBuffer) {
firstBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
auto secondBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::COMMAND_BUFFER);
if (!secondBuffer) {
secondBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
buffers[BUFFER_ALLOCATION::FIRST] = firstBuffer;
buffers[BUFFER_ALLOCATION::SECOND] = secondBuffer;
if (!buffers[BUFFER_ALLOCATION::FIRST] || !buffers[BUFFER_ALLOCATION::SECOND]) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
flushId[BUFFER_ALLOCATION::FIRST] = std::make_pair(0u, 0u);
flushId[BUFFER_ALLOCATION::SECOND] = std::make_pair(0u, 0u);
return ZE_RESULT_SUCCESS;
}
void CommandQueueImp::CommandBufferManager::destroy(Device *device) {
if (buffers[BUFFER_ALLOCATION::FIRST]) {
device->storeReusableAllocation(*buffers[BUFFER_ALLOCATION::FIRST]);
buffers[BUFFER_ALLOCATION::FIRST] = nullptr;
}
if (buffers[BUFFER_ALLOCATION::SECOND]) {
device->storeReusableAllocation(*buffers[BUFFER_ALLOCATION::SECOND]);
buffers[BUFFER_ALLOCATION::SECOND] = nullptr;
}
}
NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) {
if (bufferUse == BUFFER_ALLOCATION::FIRST) {
bufferUse = BUFFER_ALLOCATION::SECOND;
} else {
bufferUse = BUFFER_ALLOCATION::FIRST;
}
auto waitStatus{NEO::WaitStatus::Ready};
auto completionId = flushId[bufferUse];
if (completionId.second != 0u) {
UNRECOVERABLE_IF(csr == nullptr);
waitStatus = csr->waitForTaskCountWithKmdNotifyFallback(completionId.first, completionId.second, false, NEO::QueueThrottle::MEDIUM);
}
return waitStatus;
}
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) {
NEO::Device *neoDevice = this->device->getNEODevice();
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
}
if (submitAsPack) {
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
} else {
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
NEO::ResidencyContainer residencyAllocations;
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
residencyAllocations,
unifiedMemoryControls.generateMask());
makeResidentAndMigrate(performMigration, residencyAllocations);
}
}
void CommandQueueImp::makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) {
for (auto alloc : residencyContainer) {
alloc->prepareHostPtrForResidency(csr);
csr->makeResident(*alloc);
if (performMigration &&
(alloc->getAllocationType() == NEO::AllocationType::SVM_GPU ||
alloc->getAllocationType() == NEO::AllocationType::SVM_CPU)) {
auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(alloc->getGpuAddress()));
}
}
}
} // namespace L0