Enable flushTask path for BCS

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-11-30 14:57:18 +00:00
committed by Compute-Runtime-Automation
parent 99655d34f9
commit 85da0ee184
13 changed files with 370 additions and 43 deletions

View File

@@ -11,7 +11,9 @@
namespace NEO {
struct SvmAllocationData;
}
struct CompletionStamp;
class LinearStream;
} // namespace NEO
namespace L0 {
@@ -25,6 +27,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
using BaseClass::BaseClass;
using BaseClass::executeCommandListImmediate;
using BaseClass::isCopyOnly;
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
@@ -126,6 +129,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
NEO::CompletionStamp flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
void checkAvailableSpace();
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);

View File

@@ -69,7 +69,18 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::DispatchBcsFlags dispatchBcsFlags(
this->isSyncModeQueue, // flushTaskCount
hasStallingCmds, // hasStallingCmds
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
);
return this->csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
}
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::DispatchFlags dispatchFlags(
{}, // csrDependencies
nullptr, // barrierTimestampPacketNodes
@@ -103,41 +114,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
hasStallingCmds, // hasStallingCmds
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
);
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
this->commandContainer.removeDuplicatesFromResidencyContainer();
auto commandStream = this->commandContainer.getCommandStream();
size_t commandStreamStart = this->cmdListCurrentStartOffset;
auto lockCSR = this->csr->obtainUniqueOwnership();
std::unique_lock<std::mutex> lockForIndirect;
if (this->hasIndirectAllocationsAllowed()) {
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect, performMigration);
}
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
if (performMigration) {
auto deviceImp = static_cast<DeviceImp *>(this->device);
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
if (pageFaultManager == nullptr) {
performMigration = false;
}
}
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
if (performMigration) {
this->migrateSharedAllocations();
}
if (this->performMemoryPrefetch) {
auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
prefetchManager->migrateAllocationsToGpu(this->getPrefetchContext(), *this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice());
}
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT));
NEO::IndirectHeap *dsh = nullptr;
NEO::IndirectHeap *ssh = nullptr;
@@ -182,15 +162,56 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
}
}
auto completionStamp = this->csr->flushTask(
*commandStream,
commandStreamStart,
return this->csr->flushTask(
cmdStreamTask,
taskStartOffset,
dsh,
ioh,
ssh,
this->csr->peekTaskLevel(),
dispatchFlags,
*(this->device->getNEODevice()));
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
this->commandContainer.removeDuplicatesFromResidencyContainer();
auto commandStream = this->commandContainer.getCommandStream();
size_t commandStreamStart = this->cmdListCurrentStartOffset;
auto lockCSR = this->csr->obtainUniqueOwnership();
std::unique_lock<std::mutex> lockForIndirect;
if (this->hasIndirectAllocationsAllowed()) {
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect, performMigration);
}
if (performMigration) {
auto deviceImp = static_cast<DeviceImp *>(this->device);
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
if (pageFaultManager == nullptr) {
performMigration = false;
}
}
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
if (performMigration) {
this->migrateSharedAllocations();
}
if (this->performMemoryPrefetch) {
auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
prefetchManager->migrateAllocationsToGpu(this->getPrefetchContext(), *this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice());
}
NEO::CompletionStamp completionStamp;
if (isCopyOnly()) {
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies);
} else {
completionStamp = flushRegularTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies);
}
if (completionStamp.taskCount > NEO::CompletionStamp::notReady) {
if (completionStamp.taskCount == NEO::CompletionStamp::outOfHostMemory) {

View File

@@ -133,7 +133,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
commandList->internalUsage = internalUsage;
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
if ((!NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType)) && !internalUsage) {
if (!internalUsage) {
commandList->isFlushTaskSubmissionEnabled = hwHelper.isPlatformFlushTaskEnabled(hwInfo);
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();