From e09424f8b267dd20e544e0167191def60608c93c Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Mon, 22 Apr 2024 12:06:13 +0000 Subject: [PATCH] performance: remove binding flushImmediateRegularTask functions Related-To: NEO-7824 Related-To: HSD-18037800484 Signed-off-by: Kamil Kopryk --- .../source/cmdlist/cmdlist_hw_immediate.h | 6 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 176 ++++++++++++------ .../core/test/unit_tests/mocks/mock_cmdlist.h | 2 + 3 files changed, 121 insertions(+), 63 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 57d8ed7559..308e9eb4a1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -189,8 +189,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily + void handleHeapsAndResidencyForImmediateRegularTask(void *&sshCpuBaseAddress); + void handleDebugSurfaceStateUpdate(NEO::IndirectHeap *ssh); + void checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize); void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags); @@ -222,7 +227,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily flushImmediateTaskMethod; std::atomic dependenciesPresent{false}; bool latestFlushIsHostVisible = false; }; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 7f4a5f6956..202dfa0db7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -117,88 +117,115 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushBcsTask } template -NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) { - void *sshCpuPointer = nullptr; +void CommandListCoreFamilyImmediate::handleDebugSurfaceStateUpdate(NEO::IndirectHeap *ssh) { - if (kernelOperation) { - NEO::IndirectHeap *dsh = nullptr; - NEO::IndirectHeap *ssh = nullptr; + NEO::Device *neoDevice = this->device->getNEODevice(); + if (neoDevice->getDebugger() && !neoDevice->getBindlessHeapsHelper()) { + auto csrHw = static_cast *>(this->csr); + auto &sshState = csrHw->getSshState(); + bool sshDirty = sshState.updateAndCheck(ssh); - NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject); - this->csr->makeResident(*ioh->getGraphicsAllocation()); + if (sshDirty) { + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + + NEO::EncodeSurfaceStateArgs args; + args.outMemory = &surfaceState; + args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); + args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); + args.mocs = this->device->getMOCS(false, false); + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.allocation = this->device->getDebugSurface(); + args.gmmHelper = neoDevice->getGmmHelper(); + args.areMultipleSubDevicesInContext = false; + args.isDebuggerActive = true; + NEO::EncodeSurfaceState::encodeBuffer(args); + *reinterpret_cast(surfaceStateSpace) = surfaceState; + } + } +} + +template +template +void CommandListCoreFamilyImmediate::handleHeapsAndResidencyForImmediateRegularTask(void *&sshCpuBaseAddress) { + NEO::IndirectHeap *dsh = nullptr; + NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject); + NEO::IndirectHeap *ssh = nullptr; + + this->csr->makeResident(*ioh->getGraphicsAllocation()); + + if constexpr (streamStatesSupported) { if (this->requiredStreamState.stateBaseAddress.indirectObjectBaseAddress.value == NEO::StreamProperty64::initValue) { this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages()); } + } - if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) { - ssh = this->csr->getGlobalStatelessHeap(); - this->csr->makeResident(*ssh->getGraphicsAllocation()); + if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) { + ssh = this->csr->getGlobalStatelessHeap(); + this->csr->makeResident(*ssh->getGraphicsAllocation()); + + if constexpr (streamStatesSupported) { if (this->requiredStreamState.stateBaseAddress.surfaceStateBaseAddress.value == NEO::StreamProperty64::initValue) { this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); } - } else if (this->immediateCmdListHeapSharing) { - ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation; - if (ssh->getGraphicsAllocation()) { - this->csr->makeResident(*ssh->getGraphicsAllocation()); + } + } else if (this->immediateCmdListHeapSharing) { + ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation; + if (ssh->getGraphicsAllocation()) { + this->csr->makeResident(*ssh->getGraphicsAllocation()); + if constexpr (streamStatesSupported) { this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); } - if (this->dynamicHeapRequired) { - dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation; - if (dsh->getGraphicsAllocation()) { - this->csr->makeResident(*dsh->getGraphicsAllocation()); + } + if (this->dynamicHeapRequired) { + dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation; + if (dsh->getGraphicsAllocation()) { + this->csr->makeResident(*dsh->getGraphicsAllocation()); + if constexpr (streamStatesSupported) { this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages()); } } - } else { - if (this->dynamicHeapRequired) { - dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::dynamicState); - this->csr->makeResident(*dsh->getGraphicsAllocation()); + } + } else { + if (this->dynamicHeapRequired) { + dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::dynamicState); + this->csr->makeResident(*dsh->getGraphicsAllocation()); + if constexpr (streamStatesSupported) { this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages()); } - ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::surfaceState); - this->csr->makeResident(*ssh->getGraphicsAllocation()); + } + ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::surfaceState); + this->csr->makeResident(*ssh->getGraphicsAllocation()); + if constexpr (streamStatesSupported) { this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(), ssh->getHeapGpuBase(), ssh->getHeapSizeInPages()); } + } - sshCpuPointer = ssh->getCpuBase(); - - if (this->device->getL0Debugger()) { - this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); - this->csr->makeResident(*this->device->getDebugSurface()); - if (this->device->getNEODevice()->getBindlessHeapsHelper()) { - this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::specialSsh)->getGraphicsAllocation()); - } + if (this->device->getL0Debugger()) { + this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); + this->csr->makeResident(*this->device->getDebugSurface()); + if (this->device->getNEODevice()->getBindlessHeapsHelper()) { + this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::specialSsh)->getGraphicsAllocation()); } + } - NEO::Device *neoDevice = this->device->getNEODevice(); - if (neoDevice->getDebugger() && !neoDevice->getBindlessHeapsHelper()) { - auto csrHw = static_cast *>(this->csr); - auto &sshState = csrHw->getSshState(); - bool sshDirty = sshState.updateAndCheck(ssh); + UNRECOVERABLE_IF(ssh == nullptr); + sshCpuBaseAddress = ssh->getCpuBase(); + handleDebugSurfaceStateUpdate(ssh); - if (sshDirty) { - auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); - auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u)); +} - NEO::EncodeSurfaceStateArgs args; - args.outMemory = &surfaceState; - args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress(); - args.size = this->device->getDebugSurface()->getUnderlyingBufferSize(); - args.mocs = this->device->getMOCS(false, false); - args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); - args.allocation = this->device->getDebugSurface(); - args.gmmHelper = neoDevice->getGmmHelper(); - args.areMultipleSubDevicesInContext = false; - args.isDebuggerActive = true; - NEO::EncodeSurfaceState::encodeBuffer(args); - *reinterpret_cast(surfaceStateSpace) = surfaceState; - } - } +template +NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) { + void *sshCpuPointer = nullptr; + constexpr bool streamStatesSupported = true; - this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u)); + if (kernelOperation) { + handleHeapsAndResidencyForImmediateRegularTask(sshCpuPointer); } NEO::ImmediateDispatchFlags dispatchFlags{ @@ -210,10 +237,35 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmedia }; CommandListImp::storeReferenceTsToMappedEvents(true); - return this->flushImmediateTaskMethod(cmdStreamTask, - taskStartOffset, - dispatchFlags, - *(this->device->getNEODevice())); + return this->csr->flushImmediateTask(cmdStreamTask, + taskStartOffset, + dispatchFlags, + *(this->device->getNEODevice())); +} + +template +NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmediateRegularTaskStateless(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) { + + void *sshCpuPointer = nullptr; + constexpr bool streamStatesSupported = false; + + if (kernelOperation) { + handleHeapsAndResidencyForImmediateRegularTask(sshCpuPointer); + } + + NEO::ImmediateDispatchFlags dispatchFlags{ + nullptr, // requiredState + sshCpuPointer, // sshCpuBase + this->isSyncModeQueue, // blockingAppend + hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies + hasStallingCmds // hasStallingCmds + }; + CommandListImp::storeReferenceTsToMappedEvents(true); + + return this->csr->flushImmediateTaskStateless(cmdStreamTask, + taskStartOffset, + dispatchFlags, + *(this->device->getNEODevice())); } template @@ -1364,12 +1416,12 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe template void CommandListCoreFamilyImmediate::setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) { if (L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment)) { - this->computeFlushMethod = &CommandListCoreFamilyImmediate::flushImmediateRegularTask; if (this->isHeaplessStateInitEnabled()) { - this->flushImmediateTaskMethod = std::bind(&NEO::CommandStreamReceiver::flushImmediateTaskStateless, this->csr, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); + this->computeFlushMethod = &CommandListCoreFamilyImmediate::flushImmediateRegularTaskStateless; } else { - this->flushImmediateTaskMethod = std::bind(&NEO::CommandStreamReceiver::flushImmediateTask, this->csr, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); + + this->computeFlushMethod = &CommandListCoreFamilyImmediate::flushImmediateRegularTask; } } } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 655dfa6115..02d7a64fa8 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -236,12 +236,14 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate; using BaseClass::checkAssert; using BaseClass::cmdListCurrentStartOffset; + using BaseClass::cmdListHeapAddressModel; using BaseClass::cmdQImmediate; using BaseClass::commandContainer; using BaseClass::compactL3FlushEventPacket; using BaseClass::containsAnyKernel; using BaseClass::csr; using BaseClass::device; + using BaseClass::dynamicHeapRequired; using BaseClass::finalStreamState; using BaseClass::immediateCmdListHeapSharing; using BaseClass::indirectAllocationsAllowed;