mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Enable flushTask path for BCS
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
99655d34f9
commit
85da0ee184
@@ -11,7 +11,9 @@
|
||||
|
||||
namespace NEO {
|
||||
struct SvmAllocationData;
|
||||
}
|
||||
struct CompletionStamp;
|
||||
class LinearStream;
|
||||
} // namespace NEO
|
||||
|
||||
namespace L0 {
|
||||
|
||||
@@ -25,6 +27,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass = CommandListCoreFamily<gfxCoreFamily>;
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
using BaseClass::isCopyOnly;
|
||||
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
@@ -126,6 +129,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
|
||||
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
NEO::CompletionStamp flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
|
||||
void checkAvailableSpace();
|
||||
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
||||
|
||||
|
||||
@@ -69,7 +69,18 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
NEO::DispatchBcsFlags dispatchBcsFlags(
|
||||
this->isSyncModeQueue, // flushTaskCount
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||
);
|
||||
|
||||
return this->csr->flushBcsTask(cmdStreamTask, taskStartOffset, dispatchBcsFlags, this->device->getHwInfo());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
NEO::DispatchFlags dispatchFlags(
|
||||
{}, // csrDependencies
|
||||
nullptr, // barrierTimestampPacketNodes
|
||||
@@ -103,41 +114,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||
);
|
||||
|
||||
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
|
||||
|
||||
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
||||
|
||||
auto commandStream = this->commandContainer.getCommandStream();
|
||||
size_t commandStreamStart = this->cmdListCurrentStartOffset;
|
||||
|
||||
auto lockCSR = this->csr->obtainUniqueOwnership();
|
||||
|
||||
std::unique_lock<std::mutex> lockForIndirect;
|
||||
if (this->hasIndirectAllocationsAllowed()) {
|
||||
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect, performMigration);
|
||||
}
|
||||
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
|
||||
|
||||
if (performMigration) {
|
||||
auto deviceImp = static_cast<DeviceImp *>(this->device);
|
||||
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
if (pageFaultManager == nullptr) {
|
||||
performMigration = false;
|
||||
}
|
||||
}
|
||||
|
||||
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
|
||||
|
||||
if (performMigration) {
|
||||
this->migrateSharedAllocations();
|
||||
}
|
||||
|
||||
if (this->performMemoryPrefetch) {
|
||||
auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
|
||||
prefetchManager->migrateAllocationsToGpu(this->getPrefetchContext(), *this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice());
|
||||
}
|
||||
|
||||
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT));
|
||||
NEO::IndirectHeap *dsh = nullptr;
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
@@ -182,15 +162,56 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
||||
}
|
||||
}
|
||||
|
||||
auto completionStamp = this->csr->flushTask(
|
||||
*commandStream,
|
||||
commandStreamStart,
|
||||
return this->csr->flushTask(
|
||||
cmdStreamTask,
|
||||
taskStartOffset,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
this->csr->peekTaskLevel(),
|
||||
dispatchFlags,
|
||||
*(this->device->getNEODevice()));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
||||
|
||||
auto commandStream = this->commandContainer.getCommandStream();
|
||||
size_t commandStreamStart = this->cmdListCurrentStartOffset;
|
||||
|
||||
auto lockCSR = this->csr->obtainUniqueOwnership();
|
||||
|
||||
std::unique_lock<std::mutex> lockForIndirect;
|
||||
if (this->hasIndirectAllocationsAllowed()) {
|
||||
this->cmdQImmediate->handleIndirectAllocationResidency(this->getUnifiedMemoryControls(), lockForIndirect, performMigration);
|
||||
}
|
||||
|
||||
if (performMigration) {
|
||||
auto deviceImp = static_cast<DeviceImp *>(this->device);
|
||||
auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager();
|
||||
if (pageFaultManager == nullptr) {
|
||||
performMigration = false;
|
||||
}
|
||||
}
|
||||
|
||||
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
|
||||
|
||||
if (performMigration) {
|
||||
this->migrateSharedAllocations();
|
||||
}
|
||||
|
||||
if (this->performMemoryPrefetch) {
|
||||
auto prefetchManager = this->device->getDriverHandle()->getMemoryManager()->getPrefetchManager();
|
||||
prefetchManager->migrateAllocationsToGpu(this->getPrefetchContext(), *this->device->getDriverHandle()->getSvmAllocsManager(), *this->device->getNEODevice());
|
||||
}
|
||||
|
||||
NEO::CompletionStamp completionStamp;
|
||||
if (isCopyOnly()) {
|
||||
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
completionStamp = flushRegularTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
}
|
||||
|
||||
if (completionStamp.taskCount > NEO::CompletionStamp::notReady) {
|
||||
if (completionStamp.taskCount == NEO::CompletionStamp::outOfHostMemory) {
|
||||
|
||||
@@ -133,7 +133,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
commandList->internalUsage = internalUsage;
|
||||
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
|
||||
if ((!NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType)) && !internalUsage) {
|
||||
if (!internalUsage) {
|
||||
commandList->isFlushTaskSubmissionEnabled = hwHelper.isPlatformFlushTaskEnabled(hwInfo);
|
||||
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
|
||||
commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
|
||||
|
||||
@@ -1289,7 +1289,7 @@ HWTEST2_F(CommandListCreateWithBcs,
|
||||
EXPECT_TRUE(commandList->isCopyOnly());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingCommandListUsingLinkedCopyThenFlushTaskModeNotUsed, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingCommandListUsingLinkedCopyThenFlushTaskModeUsed, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
|
||||
@@ -1306,7 +1306,7 @@ HWTEST2_F(CommandListCreateWithBcs, givenForceFlushTaskEnabledWhenCreatingComman
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
EXPECT_TRUE(commandList->isCopyOnly());
|
||||
EXPECT_FALSE(commandList->isFlushTaskSubmissionEnabled);
|
||||
EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, whenGettingCommandsToPatchThenCorrectValuesAreReturned, IsAtLeastSkl) {
|
||||
|
||||
@@ -861,7 +861,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnlyImmediateCommandListCreatedThenSlushTaskSubmissionIsSetToFalse) {
|
||||
HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnlyImmediateCommandListCreatedThenFlushTaskSubmissionIsSetToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
|
||||
|
||||
@@ -871,7 +871,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnl
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
EXPECT_EQ(false, commandList->isFlushTaskSubmissionEnabled);
|
||||
EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenAllValuesTbxAndSyncModeFlagsWhenCheckingWaitlistEventSyncRequiredThenExpectTrueOnlyForTbxTrueAndAsyncMode, IsAtLeastSkl) {
|
||||
|
||||
@@ -182,6 +182,191 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenProgramCmdStreamWithFlushTask, IsAtLeastSkl) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto cmdQueue = std::make_unique<Mock<CommandQueue>>();
|
||||
cmdQueue->csr = ultCsr;
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
commandList->isFlushTaskSubmissionEnabled = true;
|
||||
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
commandList->device = device;
|
||||
commandList->isSyncModeQueue = false;
|
||||
commandList->cmdQImmediate = cmdQueue.get();
|
||||
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->csr = ultCsr;
|
||||
|
||||
// Program CSR state on first submit
|
||||
|
||||
EXPECT_EQ(0u, ultCsr->getCS(0).getUsed());
|
||||
|
||||
bool hwContextProgrammingRequired = (ultCsr->getCmdsSizeForHardwareContext() > 0);
|
||||
|
||||
size_t expectedSize = 0;
|
||||
if (hwContextProgrammingRequired) {
|
||||
expectedSize = alignUp(ultCsr->getCmdsSizeForHardwareContext() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START), MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
|
||||
|
||||
EXPECT_EQ(expectedSize, ultCsr->getCS(0).getUsed());
|
||||
|
||||
size_t offset = 0;
|
||||
if constexpr (FamilyType::isUsingMiMemFence) {
|
||||
if (ultCsr->globalFenceAllocation) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
auto sysMemFence = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(ultCsr->getCS(0).getCpuBase());
|
||||
ASSERT_NE(nullptr, sysMemFence);
|
||||
EXPECT_EQ(ultCsr->globalFenceAllocation->getGpuAddress(), sysMemFence->getSystemMemoryFenceAddress());
|
||||
offset += sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS);
|
||||
}
|
||||
}
|
||||
|
||||
if (hwContextProgrammingRequired) {
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(ultCsr->getCS(0).getCpuBase(), offset));
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getGpuBase(), bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
auto findTagUpdate = [](void *streamBase, size_t sizeUsed, uint64_t tagAddress) -> bool {
|
||||
GenCmdList genCmdList;
|
||||
EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer(genCmdList, streamBase, sizeUsed));
|
||||
|
||||
auto itor = find<MI_FLUSH_DW *>(genCmdList.begin(), genCmdList.end());
|
||||
bool found = false;
|
||||
|
||||
while (itor != genCmdList.end()) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||
if (cmd && cmd->getDestinationAddress() == tagAddress) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
itor++;
|
||||
}
|
||||
|
||||
return found;
|
||||
};
|
||||
|
||||
EXPECT_FALSE(findTagUpdate(commandList->commandContainer.getCommandStream()->getCpuBase(),
|
||||
commandList->commandContainer.getCommandStream()->getUsed(),
|
||||
ultCsr->getTagAllocation()->getGpuAddress()));
|
||||
|
||||
// Dont program CSR state on next submit
|
||||
size_t csrOfffset = ultCsr->getCS(0).getUsed();
|
||||
size_t cmdListOffset = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
|
||||
|
||||
EXPECT_EQ(csrOfffset, ultCsr->getCS(0).getUsed());
|
||||
|
||||
EXPECT_FALSE(findTagUpdate(ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), cmdListOffset),
|
||||
commandList->commandContainer.getCommandStream()->getUsed() - cmdListOffset,
|
||||
ultCsr->getTagAllocation()->getGpuAddress()));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenSyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenProgramCmdStreamWithFlushTask, IsAtLeastSkl) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto cmdQueue = std::make_unique<Mock<CommandQueue>>();
|
||||
cmdQueue->csr = ultCsr;
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
commandList->isFlushTaskSubmissionEnabled = true;
|
||||
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
commandList->device = device;
|
||||
commandList->isSyncModeQueue = true;
|
||||
commandList->cmdQImmediate = cmdQueue.get();
|
||||
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->csr = ultCsr;
|
||||
|
||||
// Program CSR state on first submit
|
||||
|
||||
EXPECT_EQ(0u, ultCsr->getCS(0).getUsed());
|
||||
|
||||
bool hwContextProgrammingRequired = (ultCsr->getCmdsSizeForHardwareContext() > 0);
|
||||
|
||||
size_t expectedSize = 0;
|
||||
if (hwContextProgrammingRequired) {
|
||||
expectedSize = alignUp(ultCsr->getCmdsSizeForHardwareContext() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START), MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
|
||||
|
||||
EXPECT_EQ(expectedSize, ultCsr->getCS(0).getUsed());
|
||||
|
||||
size_t offset = 0;
|
||||
if constexpr (FamilyType::isUsingMiMemFence) {
|
||||
if (ultCsr->globalFenceAllocation) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
auto sysMemFence = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(ultCsr->getCS(0).getCpuBase());
|
||||
ASSERT_NE(nullptr, sysMemFence);
|
||||
EXPECT_EQ(ultCsr->globalFenceAllocation->getGpuAddress(), sysMemFence->getSystemMemoryFenceAddress());
|
||||
offset += sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS);
|
||||
}
|
||||
}
|
||||
|
||||
if (hwContextProgrammingRequired) {
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(ultCsr->getCS(0).getCpuBase(), offset));
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getGpuBase(), bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
auto findTagUpdate = [](void *streamBase, size_t sizeUsed, uint64_t tagAddress) -> bool {
|
||||
GenCmdList genCmdList;
|
||||
EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer(genCmdList, streamBase, sizeUsed));
|
||||
|
||||
auto itor = find<MI_FLUSH_DW *>(genCmdList.begin(), genCmdList.end());
|
||||
bool found = false;
|
||||
|
||||
while (itor != genCmdList.end()) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||
if (cmd && cmd->getDestinationAddress() == tagAddress) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
itor++;
|
||||
}
|
||||
|
||||
return found;
|
||||
};
|
||||
|
||||
EXPECT_TRUE(findTagUpdate(commandList->commandContainer.getCommandStream()->getCpuBase(),
|
||||
commandList->commandContainer.getCommandStream()->getUsed(),
|
||||
ultCsr->getTagAllocation()->getGpuAddress()));
|
||||
|
||||
// Dont program CSR state on next submit
|
||||
size_t csrOfffset = ultCsr->getCS(0).getUsed();
|
||||
size_t cmdListOffset = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr));
|
||||
|
||||
EXPECT_EQ(csrOfffset, ultCsr->getCS(0).getUsed());
|
||||
|
||||
EXPECT_TRUE(findTagUpdate(ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), cmdListOffset),
|
||||
commandList->commandContainer.getCommandStream()->getUsed() - cmdListOffset,
|
||||
ultCsr->getTagAllocation()->getGpuAddress()));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenSyncModeImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
Mock<CommandQueue> cmdQueue;
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
@@ -558,6 +558,12 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
||||
return cs;
|
||||
}
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override {
|
||||
CompletionStamp cs = {};
|
||||
return cs;
|
||||
}
|
||||
|
||||
bool flushBatchedSubmissions() override { return true; }
|
||||
|
||||
CommandStreamReceiverType getType() const override {
|
||||
|
||||
@@ -86,9 +86,10 @@ class CommandStreamReceiver {
|
||||
|
||||
virtual SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) = 0;
|
||||
|
||||
virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
|
||||
virtual CompletionStamp flushTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
|
||||
virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0;
|
||||
|
||||
virtual bool flushBatchedSubmissions() = 0;
|
||||
MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
|
||||
|
||||
@@ -45,6 +45,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
|
||||
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
|
||||
|
||||
void forcePipeControl(NEO::LinearStream &commandStreamCSR);
|
||||
|
||||
bool flushBatchedSubmissions() override;
|
||||
@@ -58,6 +60,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
size_t getRequiredStateBaseAddressSize(const Device &device) const;
|
||||
size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device);
|
||||
size_t getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
size_t getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags);
|
||||
size_t getRequiredCmdSizeForPreamble(Device &device) const;
|
||||
size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const;
|
||||
size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const;
|
||||
|
||||
@@ -179,6 +179,79 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdsSizeForHardwareContext() const
|
||||
return getCmdSizeForPrologue();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
|
||||
UNRECOVERABLE_IF(this->dispatchMode != DispatchMode::ImmediateDispatch);
|
||||
|
||||
uint64_t taskStartAddress = commandStreamTask.getGpuBase() + commandStreamTaskStart;
|
||||
|
||||
if (dispatchBcsFlags.flushTaskCount) {
|
||||
uint64_t postSyncAddress = getTagAllocation()->getGpuAddress();
|
||||
TaskCountType postSyncData = peekTaskCount() + 1;
|
||||
|
||||
NEO::MiFlushArgs args;
|
||||
args.commandWithPostSync = true;
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStreamTask, postSyncAddress, postSyncData, args, hwInfo);
|
||||
}
|
||||
|
||||
auto &commandStreamCSR = getCS(getRequiredCmdStreamSizeAligned(dispatchBcsFlags));
|
||||
size_t commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
programHardwareContext(commandStreamCSR);
|
||||
|
||||
if (globalFenceAllocation) {
|
||||
makeResident(*globalFenceAllocation);
|
||||
}
|
||||
|
||||
if (dispatchBcsFlags.flushTaskCount) {
|
||||
makeResident(*getTagAllocation());
|
||||
}
|
||||
|
||||
bool submitCSR = (commandStreamStartCSR != commandStreamCSR.getUsed());
|
||||
void *bbEndLocation = nullptr;
|
||||
|
||||
programEndingCmd(commandStreamTask, &bbEndLocation, isBlitterDirectSubmissionEnabled(), dispatchBcsFlags.hasRelaxedOrderingDependencies, false);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamTask);
|
||||
|
||||
if (submitCSR) {
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START)));
|
||||
addBatchBufferStart(bbStart, taskStartAddress, false);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
|
||||
|
||||
this->makeResident(*commandStreamCSR.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
size_t startOffset = submitCSR ? commandStreamStartCSR : commandStreamTaskStart;
|
||||
auto &streamToSubmit = submitCSR ? commandStreamCSR : commandStreamTask;
|
||||
|
||||
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, 0, taskStartAddress, nullptr,
|
||||
false, false, QueueThrottle::MEDIUM, NEO::QueueSliceCount::defaultSliceCount,
|
||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, false, (submitCSR || dispatchBcsFlags.hasStallingCmds),
|
||||
dispatchBcsFlags.hasRelaxedOrderingDependencies};
|
||||
|
||||
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||
|
||||
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
|
||||
if (submissionStatus != SubmissionStatus::SUCCESS) {
|
||||
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
if (dispatchBcsFlags.flushTaskCount) {
|
||||
this->latestFlushedTaskCount = this->taskCount + 1;
|
||||
}
|
||||
|
||||
++taskCount;
|
||||
|
||||
CompletionStamp completionStamp = {taskCount, taskLevel, flushStamp->peekStamp()};
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
LinearStream &commandStreamTask,
|
||||
@@ -856,6 +929,16 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
||||
return submitResult;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags) {
|
||||
return getCmdsSizeForHardwareContext() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags) {
|
||||
return alignUp(getRequiredCmdStreamSize(dispatchBcsFlags), MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
|
||||
size_t size = getRequiredCmdStreamSize(dispatchFlags, device);
|
||||
|
||||
@@ -48,6 +48,17 @@ constexpr uint32_t l3AndL1On = 2u;
|
||||
constexpr uint32_t NotApplicable = 3u;
|
||||
} // namespace L3CachingSettings
|
||||
|
||||
struct DispatchBcsFlags {
|
||||
DispatchBcsFlags() = delete;
|
||||
|
||||
DispatchBcsFlags(bool flushTaskCount, bool hasStallingCmds, bool hasRelaxedOrderingDependencies)
|
||||
: flushTaskCount(flushTaskCount), hasStallingCmds(hasStallingCmds), hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies) {}
|
||||
|
||||
bool flushTaskCount = false;
|
||||
bool hasStallingCmds = false;
|
||||
bool hasRelaxedOrderingDependencies = false;
|
||||
};
|
||||
|
||||
struct DispatchFlags {
|
||||
DispatchFlags() = delete;
|
||||
DispatchFlags(CsrDependencies csrDependenciesP, TimestampPacketContainer *barrierTimestampPacketNodesP, PipelineSelectArgs pipelineSelectArgsP,
|
||||
|
||||
@@ -26,3 +26,10 @@ CompletionStamp MockCommandStreamReceiver::flushTask(
|
||||
CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()};
|
||||
return stamp;
|
||||
}
|
||||
|
||||
CompletionStamp MockCommandStreamReceiver::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
|
||||
++taskCount;
|
||||
CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()};
|
||||
return stamp;
|
||||
}
|
||||
@@ -104,6 +104,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
DispatchFlags &dispatchFlags,
|
||||
Device &device) override;
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
|
||||
|
||||
bool flushBatchedSubmissions() override {
|
||||
if (flushBatchedSubmissionsCallCounter) {
|
||||
(*flushBatchedSubmissionsCallCounter)++;
|
||||
|
||||
Reference in New Issue
Block a user