Avoid implicit sync for async mode immediate copy queue
Related-To: LOCI-1988 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
parent
ced22d45e9
commit
31b2433b2f
|
@ -257,6 +257,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
uint32_t partitionCount = 1;
|
||||
bool isFlushTaskSubmissionEnabled = false;
|
||||
bool isSyncModeQueue = false;
|
||||
bool isTbxMode = false;
|
||||
bool commandListSLMEnabled = false;
|
||||
bool requiresQueueUncachedMocs = false;
|
||||
|
||||
|
|
|
@ -117,12 +117,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
|||
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
|
||||
}
|
||||
|
||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && !isCopyOnly() && !isInternal()) {
|
||||
const auto &hwInfo = device->getHwInfo();
|
||||
this->isFlushTaskSubmissionEnabled = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).isPlatformFlushTaskEnabled(hwInfo);
|
||||
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
|
||||
this->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
|
||||
}
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
|
||||
}
|
||||
|
||||
|
@ -149,17 +144,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeCommandListImmediate(bo
|
|||
this->close();
|
||||
ze_command_list_handle_t immediateHandle = this->toHandle();
|
||||
|
||||
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
||||
const auto commandListExecutionResult = this->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, performMigration);
|
||||
if (commandListExecutionResult == ZE_RESULT_ERROR_DEVICE_LOST) {
|
||||
return commandListExecutionResult;
|
||||
}
|
||||
|
||||
const auto synchronizationResult = this->cmdQImmediate->synchronize(std::numeric_limits<uint64_t>::max());
|
||||
if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) {
|
||||
return synchronizationResult;
|
||||
}
|
||||
if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) {
|
||||
this->commandContainer.currentLinearStreamStartOffset = this->commandContainer.getCommandStream()->getUsed();
|
||||
} else {
|
||||
const auto synchronizationResult = this->cmdQImmediate->synchronize(std::numeric_limits<uint64_t>::max());
|
||||
if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) {
|
||||
return synchronizationResult;
|
||||
}
|
||||
|
||||
this->reset();
|
||||
this->reset();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -114,6 +114,13 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
|||
commandList->internalUsage = internalUsage;
|
||||
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
|
||||
if (!(NEO::EngineGroupType::Copy == engineType) && !internalUsage) {
|
||||
const auto &hwInfo = device->getHwInfo();
|
||||
commandList->isFlushTaskSubmissionEnabled = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).isPlatformFlushTaskEnabled(hwInfo);
|
||||
if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) {
|
||||
commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get();
|
||||
}
|
||||
}
|
||||
returnValue = commandList->initialize(device, engineType, desc->flags);
|
||||
if (returnValue != ZE_RESULT_SUCCESS) {
|
||||
commandList->destroy();
|
||||
|
@ -130,6 +137,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
|||
|
||||
commandList->cmdQImmediate = commandQueue;
|
||||
commandList->csr = csr;
|
||||
commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB);
|
||||
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
return commandList;
|
||||
}
|
||||
|
|
|
@ -369,6 +369,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
||||
auto cmdBufferCount = cmdBufferAllocations.size();
|
||||
bool immediateMode = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
|
||||
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
|
@ -413,7 +414,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||
|
||||
for (size_t iter = 0; iter < cmdBufferCount; iter++) {
|
||||
auto allocation = cmdBufferAllocations[iter];
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, allocation->getGpuAddress(), true);
|
||||
uint64_t startOffset = allocation->getGpuAddress();
|
||||
if (immediateMode && (iter == (cmdBufferCount - 1))) {
|
||||
startOffset = ptrOffset(allocation->getGpuAddress(), commandList->commandContainer.currentLinearStreamStartOffset);
|
||||
}
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&child, startOffset, true);
|
||||
}
|
||||
|
||||
printfFunctionContainer.insert(printfFunctionContainer.end(),
|
||||
|
|
|
@ -1694,7 +1694,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlus
|
|||
EXPECT_NE(cmdList.end(), itor);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsNotInsertedInTheCmdContainer) {
|
||||
HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsInsertedInTheCmdContainer) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
|
@ -1713,7 +1713,7 @@ HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierT
|
|||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) {
|
||||
|
|
|
@ -731,11 +731,11 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly
|
|||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
EXPECT_GT(commandContainer.getCommandStream()->getUsed(), used);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) {
|
||||
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAdded) {
|
||||
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
|
@ -763,8 +763,33 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe
|
|||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
EXPECT_GT(commandContainer.getCommandStream()->getUsed(), used);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateCommandListWhenAppendWaitEventsReturnsSuccess) {
|
||||
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
EXPECT_EQ(device, commandList->device);
|
||||
EXPECT_EQ(1u, commandList->cmdListType);
|
||||
EXPECT_NE(nullptr, commandList->cmdQImmediate);
|
||||
|
||||
commandList->isTbxMode = true;
|
||||
|
||||
MockEvent event, event2;
|
||||
event.signalScope = 0;
|
||||
event.waitScope = 0;
|
||||
event2.waitScope = 0;
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
|
||||
auto ret = commandList->appendWaitOnEvents(2, events);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnlyImmediateCommandListCreatedThenSlushTaskSubmissionIsSetToFalse) {
|
||||
|
|
|
@ -1213,6 +1213,59 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCoo
|
|||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
|
||||
}
|
||||
|
||||
using WithinXeHPAndXeHPC = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelWithImmediateListWithoutFlushTaskThenNewCmdBufferAllocated, WithinXeHPAndXeHPC) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0);
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
createKernel();
|
||||
|
||||
ze_result_t returnValue;
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue));
|
||||
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
const auto stream = commandContainer.getCommandStream();
|
||||
const auto streamCpu = stream->getCpuBase();
|
||||
|
||||
Vec3<size_t> groupCount{1, 1, 1};
|
||||
auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END);
|
||||
auto available = stream->getAvailableSpace();
|
||||
stream->getSpace(available - sizeLeftInStream);
|
||||
|
||||
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
|
||||
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
0,
|
||||
device->getNEODevice(),
|
||||
kernel.get(),
|
||||
threadGroupDimensions,
|
||||
PreemptionMode::MidBatch,
|
||||
0,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false};
|
||||
NEO::EncodeDispatchKernel<FamilyType>::encode(commandContainer, dispatchKernelArgs);
|
||||
|
||||
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, 0u);
|
||||
|
||||
const auto streamCpu2 = stream->getCpuBase();
|
||||
|
||||
EXPECT_NE(nullptr, streamCpu2);
|
||||
EXPECT_NE(streamCpu, streamCpu2);
|
||||
|
||||
EXPECT_EQ(2u, commandContainer.getCmdBufferAllocations().size());
|
||||
auto immediateHandle = commandList->toHandle();
|
||||
returnValue = commandList->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
|
|
|
@ -220,6 +220,51 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWith
|
|||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
Mock<CommandQueue> cmdQueue;
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
commandList->device = device;
|
||||
commandList->cmdQImmediate = &cmdQueue;
|
||||
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
|
||||
|
||||
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
|
||||
EXPECT_EQ(0u, cmdQueue.synchronizeCalled);
|
||||
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenSyncModeImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
Mock<CommandQueue> cmdQueue;
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
commandList->device = device;
|
||||
commandList->cmdQImmediate = &cmdQueue;
|
||||
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->isSyncModeQueue = true;
|
||||
|
||||
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled);
|
||||
EXPECT_EQ(1u, cmdQueue.synchronizeCalled);
|
||||
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
|
|
|
@ -286,6 +286,7 @@ void CommandContainer::closeAndAllocateNextCommandBuffer() {
|
|||
auto ptr = commandStream->getSpace(0u);
|
||||
memcpy_s(ptr, bbEndSize, hwHelper.getBatchBufferEndReference(), bbEndSize);
|
||||
allocateNextCommandBuffer();
|
||||
currentLinearStreamStartOffset = 0u;
|
||||
}
|
||||
|
||||
void CommandContainer::prepareBindfulSsh() {
|
||||
|
|
|
@ -80,6 +80,7 @@ class CommandContainer : public NonCopyableOrMovableClass {
|
|||
uint32_t nextIddInBlock = 0;
|
||||
bool lastPipelineSelectModeRequired = false;
|
||||
bool lastSentUseGlobalAtomics = false;
|
||||
uint64_t currentLinearStreamStartOffset = 0u;
|
||||
|
||||
Device *getDevice() const { return device; }
|
||||
|
||||
|
|
Loading…
Reference in New Issue