performance: remove binding flushImmediateRegularTask functions
Related-To: NEO-7824 Related-To: HSD-18037800484 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
dc158b8705
commit
e09424f8b2
|
@ -189,8 +189,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||
|
||||
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
|
||||
NEO::CompletionStamp flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
|
||||
NEO::CompletionStamp flushImmediateRegularTaskStateless(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
|
||||
NEO::CompletionStamp flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, NEO::CommandStreamReceiver *csr);
|
||||
|
||||
template <bool streamStatesSupported>
|
||||
void handleHeapsAndResidencyForImmediateRegularTask(void *&sshCpuBaseAddress);
|
||||
void handleDebugSurfaceStateUpdate(NEO::IndirectHeap *ssh);
|
||||
|
||||
void checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize);
|
||||
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
||||
|
||||
|
@ -222,7 +227,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
std::function<NEO::CompletionStamp(NEO::LinearStream &, size_t, NEO::ImmediateDispatchFlags &, NEO::Device &)> flushImmediateTaskMethod;
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
bool latestFlushIsHostVisible = false;
|
||||
};
|
||||
|
|
|
@ -117,88 +117,115 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
|
|||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
|
||||
void *sshCpuPointer = nullptr;
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleDebugSurfaceStateUpdate(NEO::IndirectHeap *ssh) {
|
||||
|
||||
if (kernelOperation) {
|
||||
NEO::IndirectHeap *dsh = nullptr;
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||
if (neoDevice->getDebugger() && !neoDevice->getBindlessHeapsHelper()) {
|
||||
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
|
||||
auto &sshState = csrHw->getSshState();
|
||||
bool sshDirty = sshState.updateAndCheck(ssh);
|
||||
|
||||
NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject);
|
||||
this->csr->makeResident(*ioh->getGraphicsAllocation());
|
||||
if (sshDirty) {
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
|
||||
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
|
||||
args.mocs = this->device->getMOCS(false, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = this->device->getDebugSurface();
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = false;
|
||||
args.isDebuggerActive = true;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
template <bool streamStatesSupported>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleHeapsAndResidencyForImmediateRegularTask(void *&sshCpuBaseAddress) {
|
||||
NEO::IndirectHeap *dsh = nullptr;
|
||||
NEO::IndirectHeap *ioh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject);
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
|
||||
this->csr->makeResident(*ioh->getGraphicsAllocation());
|
||||
|
||||
if constexpr (streamStatesSupported) {
|
||||
if (this->requiredStreamState.stateBaseAddress.indirectObjectBaseAddress.value == NEO::StreamProperty64::initValue) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesIndirectState(ioh->getHeapGpuBase(), ioh->getHeapSizeInPages());
|
||||
}
|
||||
}
|
||||
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
ssh = this->csr->getGlobalStatelessHeap();
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
ssh = this->csr->getGlobalStatelessHeap();
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
|
||||
if constexpr (streamStatesSupported) {
|
||||
if (this->requiredStreamState.stateBaseAddress.surfaceStateBaseAddress.value == NEO::StreamProperty64::initValue) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
|
||||
}
|
||||
} else if (this->immediateCmdListHeapSharing) {
|
||||
ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation;
|
||||
if (ssh->getGraphicsAllocation()) {
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
}
|
||||
} else if (this->immediateCmdListHeapSharing) {
|
||||
ssh = this->commandContainer.getSurfaceStateHeapReserve().indirectHeapReservation;
|
||||
if (ssh->getGraphicsAllocation()) {
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
|
||||
if constexpr (streamStatesSupported) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
|
||||
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
|
||||
}
|
||||
if (this->dynamicHeapRequired) {
|
||||
dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation;
|
||||
if (dsh->getGraphicsAllocation()) {
|
||||
this->csr->makeResident(*dsh->getGraphicsAllocation());
|
||||
}
|
||||
if (this->dynamicHeapRequired) {
|
||||
dsh = this->commandContainer.getDynamicStateHeapReserve().indirectHeapReservation;
|
||||
if (dsh->getGraphicsAllocation()) {
|
||||
this->csr->makeResident(*dsh->getGraphicsAllocation());
|
||||
if constexpr (streamStatesSupported) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (this->dynamicHeapRequired) {
|
||||
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::dynamicState);
|
||||
this->csr->makeResident(*dsh->getGraphicsAllocation());
|
||||
}
|
||||
} else {
|
||||
if (this->dynamicHeapRequired) {
|
||||
dsh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::dynamicState);
|
||||
this->csr->makeResident(*dsh->getGraphicsAllocation());
|
||||
if constexpr (streamStatesSupported) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesDynamicState(dsh->getHeapGpuBase(), dsh->getHeapSizeInPages());
|
||||
}
|
||||
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::surfaceState);
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
}
|
||||
ssh = this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::surfaceState);
|
||||
this->csr->makeResident(*ssh->getGraphicsAllocation());
|
||||
if constexpr (streamStatesSupported) {
|
||||
this->requiredStreamState.stateBaseAddress.setPropertiesBindingTableSurfaceState(ssh->getHeapGpuBase(), ssh->getHeapSizeInPages(),
|
||||
ssh->getHeapGpuBase(), ssh->getHeapSizeInPages());
|
||||
}
|
||||
}
|
||||
|
||||
sshCpuPointer = ssh->getCpuBase();
|
||||
|
||||
if (this->device->getL0Debugger()) {
|
||||
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
|
||||
this->csr->makeResident(*this->device->getDebugSurface());
|
||||
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {
|
||||
this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::specialSsh)->getGraphicsAllocation());
|
||||
}
|
||||
if (this->device->getL0Debugger()) {
|
||||
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
|
||||
this->csr->makeResident(*this->device->getDebugSurface());
|
||||
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {
|
||||
this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::specialSsh)->getGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||
if (neoDevice->getDebugger() && !neoDevice->getBindlessHeapsHelper()) {
|
||||
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
|
||||
auto &sshState = csrHw->getSshState();
|
||||
bool sshDirty = sshState.updateAndCheck(ssh);
|
||||
UNRECOVERABLE_IF(ssh == nullptr);
|
||||
sshCpuBaseAddress = ssh->getCpuBase();
|
||||
handleDebugSurfaceStateUpdate(ssh);
|
||||
|
||||
if (sshDirty) {
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
|
||||
}
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
|
||||
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
|
||||
args.mocs = this->device->getMOCS(false, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = this->device->getDebugSurface();
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = false;
|
||||
args.isDebuggerActive = true;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
|
||||
void *sshCpuPointer = nullptr;
|
||||
constexpr bool streamStatesSupported = true;
|
||||
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
|
||||
if (kernelOperation) {
|
||||
handleHeapsAndResidencyForImmediateRegularTask<streamStatesSupported>(sshCpuPointer);
|
||||
}
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
|
@ -210,10 +237,35 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
|||
};
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
|
||||
return this->flushImmediateTaskMethod(cmdStreamTask,
|
||||
taskStartOffset,
|
||||
dispatchFlags,
|
||||
*(this->device->getNEODevice()));
|
||||
return this->csr->flushImmediateTask(cmdStreamTask,
|
||||
taskStartOffset,
|
||||
dispatchFlags,
|
||||
*(this->device->getNEODevice()));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTaskStateless(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
|
||||
|
||||
void *sshCpuPointer = nullptr;
|
||||
constexpr bool streamStatesSupported = false;
|
||||
|
||||
if (kernelOperation) {
|
||||
handleHeapsAndResidencyForImmediateRegularTask<streamStatesSupported>(sshCpuPointer);
|
||||
}
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
nullptr, // requiredState
|
||||
sshCpuPointer, // sshCpuBase
|
||||
this->isSyncModeQueue, // blockingAppend
|
||||
hasRelaxedOrderingDependencies, // hasRelaxedOrderingDependencies
|
||||
hasStallingCmds // hasStallingCmds
|
||||
};
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
|
||||
return this->csr->flushImmediateTaskStateless(cmdStreamTask,
|
||||
taskStartOffset,
|
||||
dispatchFlags,
|
||||
*(this->device->getNEODevice()));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -1364,12 +1416,12 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(rootDeviceEnvironment)) {
|
||||
this->computeFlushMethod = &CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask;
|
||||
|
||||
if (this->isHeaplessStateInitEnabled()) {
|
||||
this->flushImmediateTaskMethod = std::bind(&NEO::CommandStreamReceiver::flushImmediateTaskStateless, this->csr, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
|
||||
this->computeFlushMethod = &CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTaskStateless;
|
||||
} else {
|
||||
this->flushImmediateTaskMethod = std::bind(&NEO::CommandStreamReceiver::flushImmediateTask, this->csr, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
|
||||
|
||||
this->computeFlushMethod = &CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -236,12 +236,14 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreF
|
|||
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
|
||||
using BaseClass::checkAssert;
|
||||
using BaseClass::cmdListCurrentStartOffset;
|
||||
using BaseClass::cmdListHeapAddressModel;
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::device;
|
||||
using BaseClass::dynamicHeapRequired;
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
|
|
Loading…
Reference in New Issue