refactor: change immediate command list flushing method interface

- this change adds parameter that describes current flush contains kernel

Related-To: NEO-8281

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-08-23 14:32:45 +00:00
committed by Compute-Runtime-Automation
parent f264b52702
commit 757b64ea4c
8 changed files with 118 additions and 88 deletions

View File

@@ -47,7 +47,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass::isCopyOnly;
using BaseClass::isInOrderExecutionEnabled;
using ComputeFlushMethodType = NEO::CompletionStamp (CommandListCoreFamilyImmediate<gfxCoreFamily>::*)(NEO::LinearStream &, size_t, bool, bool);
using ComputeFlushMethodType = NEO::CompletionStamp (CommandListCoreFamilyImmediate<gfxCoreFamily>::*)(NEO::LinearStream &, size_t, bool, bool, bool);
CommandListCoreFamilyImmediate(uint32_t numIddsPerBlock);
@@ -151,17 +151,17 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t hostSynchronize(uint64_t timeout) override;
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
ze_result_t executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, CommandQueue *cmdQ);
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
ze_result_t executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, CommandQueue *cmdQ);
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
NEO::CompletionStamp flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
NEO::CompletionStamp flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
NEO::CompletionStamp flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation);
NEO::CompletionStamp flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, NEO::CommandStreamReceiver *csr);
void checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize);
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent);
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent);
void createLogicalStateHelper() override {}
NEO::LogicalStateHelper *getLogicalStateHelper() const override;

View File

@@ -112,7 +112,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask
}
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediateRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
bool sbaDirty = this->csr->getGSBAStateDirty();
NEO::IndirectHeap *dsh = nullptr;
@@ -208,7 +208,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
}
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
NEO::DispatchFlags dispatchFlags(
{}, // csrDependencies
nullptr, // barrierTimestampPacketNodes
@@ -314,12 +314,12 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, this->cmdQImmediate);
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) {
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, this->cmdQImmediate);
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, CommandQueue *cmdQ) {
inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTaskImpl(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, CommandQueue *cmdQ) {
this->commandContainer.removeDuplicatesFromResidencyContainer();
auto commandStream = this->commandContainer.getCommandStream();
@@ -371,7 +371,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
if (isCopyOnly()) {
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, csr);
} else {
completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies);
completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
}
if (completionStamp.taskCount > NEO::CompletionStamp::notReady) {
@@ -447,7 +447,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
hSignalEvent, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -464,7 +464,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -509,7 +509,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
this->dependenciesPresent = true;
return flushImmediate(ret, true, !relaxedOrderingDispatch, relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, !relaxedOrderingDispatch, relaxedOrderingDispatch, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -561,7 +561,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling);
}
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -622,7 +622,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling);
}
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -640,7 +640,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -650,7 +650,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
checkAvailableSpace(0, false, commonImmediateCommandSize);
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
return flushImmediate(ret, true, true, false, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -660,7 +660,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
checkAvailableSpace(0, false, commonImmediateCommandSize);
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
return flushImmediate(ret, true, true, false, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -690,7 +690,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
}
return flushImmediate(ret, false, false, relaxedOrdering, nullptr);
return flushImmediate(ret, false, false, relaxedOrdering, true, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -708,7 +708,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies, signalInOrderCompletion);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, false, nullptr);
return flushImmediate(ret, true, true, false, false, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -722,7 +722,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, false, hSignalEvent);
return flushImmediate(ret, true, true, false, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -770,7 +770,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -791,7 +791,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -812,7 +812,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -827,7 +827,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
checkWaitEventsState(numWaitEvents, phWaitEvents);
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, false, hSignalEvent);
return flushImmediate(ret, true, true, false, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -845,7 +845,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -888,7 +888,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent) {
auto signalEvent = Event::fromHandle(hSignalEvent);
if (inputRet == ZE_RESULT_SUCCESS) {
@@ -900,7 +900,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
}
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
} else {
inputRet = executeCommandListImmediate(performMigration);
}

View File

@@ -107,7 +107,7 @@ struct BcsSplit {
result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle);
if (cmdList->flushTaskSubmissionEnabled()) {
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, false, hasRelaxedOrderingDependencies, cmdQsForSplit[i]);
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, false, hasRelaxedOrderingDependencies, false, cmdQsForSplit[i]);
} else {
cmdList->executeCommandListImmediateImpl(performMigration, cmdQsForSplit[i]);
}

View File

@@ -230,9 +230,70 @@ void CommandListGlobalHeapsFixtureInit::tearDown() {
}
void ImmediateCmdListSharedHeapsFixture::setUp() {
constexpr uint32_t storeAllocations = 4;
DebugManager.flags.EnableFlushTaskSubmission.set(1);
DebugManager.flags.EnableImmediateCmdListHeapSharing.set(1);
DebugManager.flags.SelectCmdListHeapAddressModel.set(static_cast<int32_t>(NEO::HeapAddressModel::PrivateHeaps));
DebugManager.flags.SetAmountOfReusableAllocations.set(storeAllocations);
ModuleMutableCommandListFixture::setUp();
for (uint32_t i = 0; i < storeAllocations; i++) {
auto heapAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), true, 2 * MB,
NEO::AllocationType::LINEAR_STREAM, false, false,
neoDevice->getDeviceBitfield()});
commandListImmediate->csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(heapAllocation), REUSABLE_ALLOCATION);
}
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc{};
queueDesc.ordinal = 0u;
queueDesc.index = 0u;
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
commandListImmediateCoexisting.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue)));
if (this->dshRequired) {
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2;
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset = 16;
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
kernel->dynamicStateHeapDataSize = static_cast<uint32_t>(16 * 2 + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset);
kernel->dynamicStateHeapData.reset(new uint8_t[kernel->dynamicStateHeapDataSize]);
mockKernelImmData->mockKernelDescriptor->payloadMappings.samplerTable = mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable;
}
mockKernelImmData->kernelInfo->heapInfo.surfaceStateHeapSize = static_cast<uint32_t>(64 + sizeof(uint32_t));
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.numEntries = 1;
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.tableOffset = 0x40;
mockKernelImmData->mockKernelDescriptor->kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless;
kernel->surfaceStateHeapDataSize = mockKernelImmData->kernelInfo->heapInfo.surfaceStateHeapSize;
kernel->surfaceStateHeapData.reset(new uint8_t[kernel->surfaceStateHeapDataSize]);
ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
eventPool = std::unique_ptr<EventPool>(static_cast<EventPool *>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)));
auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
event = std::unique_ptr<Event>(static_cast<Event *>(l0GfxCoreHelper.createEvent(eventPool.get(), &eventDesc, device)));
}
void ImmediateCmdListSharedHeapsFixture::tearDown() {
event.reset(nullptr);
eventPool.reset(nullptr);
commandListImmediateCoexisting.reset(nullptr);
ModuleMutableCommandListFixture::tearDown();
}
bool AppendFillFixture::MockDriverFillHandle::findAllocationDataForRange(const void *buffer,

View File

@@ -162,6 +162,11 @@ struct CommandListGlobalHeapsFixture : public CommandListGlobalHeapsFixtureInit
struct ImmediateCmdListSharedHeapsFixture : public ModuleMutableCommandListFixture {
void setUp();
void tearDown();
std::unique_ptr<L0::ult::CommandList> commandListImmediateCoexisting;
std::unique_ptr<EventPool> eventPool;
std::unique_ptr<Event> event;
};
class AppendFillFixture : public DeviceFixture {

View File

@@ -598,10 +598,10 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
return executeCommandListImmediateReturnValue;
}
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) override {
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) override {
++executeCommandListImmediateWithFlushTaskCalledCount;
if (callBaseExecute) {
return BaseClass::executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
return BaseClass::executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
}
return executeCommandListImmediateWithFlushTaskReturnValue;
}

View File

@@ -84,7 +84,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1;
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1;
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {};
productHelper.fillScmPropertiesSupportStructure(scmPropertiesSupport);
@@ -113,7 +113,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0;
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0;
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1;
expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? 0 : -1;
@@ -142,7 +142,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
commandListImmediate.containsAnyKernel = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
@@ -157,7 +157,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
commandListImmediate.containsAnyKernel = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
@@ -172,7 +172,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
commandListImmediate.containsAnyKernel = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
@@ -187,7 +187,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
commandListImmediate.containsAnyKernel = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true);
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
@@ -198,7 +198,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false));
}
HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
@@ -210,7 +210,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecu
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false));
}
HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
@@ -222,7 +222,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExe
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false));
}
HWTEST2_F(CommandListExecuteImmediate, GivenImmediateCommandListWhenCommandListIsCreatedThenCsrStateIsNotSet, IsAtLeastSkl) {
@@ -513,7 +513,7 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
cmdList.csr = event->csrs[0];
event->csrs[0] = &mockCommandStreamReceiver;
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, event->toHandle());
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, false, event->toHandle());
EXPECT_EQ(event->csrs[0], cmdList.csr);
}
@@ -1030,7 +1030,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandLis
auto oldCommandQueue = commandListImmediate.cmdQImmediate;
commandListImmediate.cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
commandListImmediate.cmdQImmediate = oldCommandQueue;
}
@@ -1049,7 +1049,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL
auto oldCommandQueue = commandListImmediate.cmdQImmediate;
commandListImmediate.cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = false;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
commandListImmediate.cmdQImmediate = oldCommandQueue;
}
@@ -1060,37 +1060,9 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using SAMPLER_BORDER_COLOR_STATE = typename FamilyType::SAMPLER_BORDER_COLOR_STATE;
auto &hwInfo = device->getHwInfo();
uint32_t expectedSbaCount = 1;
auto &productHelper = device->getProductHelper();
if (productHelper.isAdditionalStateBaseAddressWARequired(hwInfo)) {
expectedSbaCount++;
}
auto &cmdContainer = commandListImmediate->commandContainer;
bool dshPresent = hwInfo.capabilityTable.supportsImages || NEO::UnitTestHelper<FamilyType>::getAdditionalDshSize(cmdContainer.getNumIddPerBlock()) > 0;
if (dshPresent) {
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers = 2;
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset = sizeof(SAMPLER_BORDER_COLOR_STATE);
mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
kernel->dynamicStateHeapDataSize = static_cast<uint32_t>(sizeof(SAMPLER_STATE) * 2 + mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset);
kernel->dynamicStateHeapData.reset(new uint8_t[kernel->dynamicStateHeapDataSize]);
mockKernelImmData->mockKernelDescriptor->payloadMappings.samplerTable = mockKernelImmData->kernelInfo->kernelDescriptor.payloadMappings.samplerTable;
}
mockKernelImmData->kernelInfo->heapInfo.surfaceStateHeapSize = static_cast<uint32_t>(sizeof(RENDER_SURFACE_STATE) + sizeof(uint32_t));
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.numEntries = 1;
mockKernelImmData->mockKernelDescriptor->payloadMappings.bindingTable.tableOffset = 0x40;
mockKernelImmData->mockKernelDescriptor->kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless;
kernel->surfaceStateHeapDataSize = mockKernelImmData->kernelInfo->heapInfo.surfaceStateHeapSize;
kernel->surfaceStateHeapData.reset(new uint8_t[kernel->surfaceStateHeapDataSize]);
EXPECT_TRUE(commandListImmediate->isFlushTaskSubmissionEnabled);
EXPECT_TRUE(commandListImmediate->immediateCmdListHeapSharing);
@@ -1119,7 +1091,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
NEO::IndirectHeap *containerDshHeap = cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE);
NEO::IndirectHeap *containerSshHeap = cmdContainer.getIndirectHeap(HeapType::SURFACE_STATE);
if (dshPresent) {
if (this->dshRequired) {
EXPECT_EQ(csrDshHeap, containerDshHeap);
} else {
EXPECT_EQ(nullptr, containerDshHeap);
@@ -1132,10 +1104,10 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
(csrUsedAfter - csrUsedBefore)));
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCount, sbaCmds.size());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto &sbaCmd = *genCmdCast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
if (dshPresent) {
if (this->dshRequired) {
EXPECT_TRUE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
EXPECT_EQ(csrDshHeap->getHeapGpuBase(), sbaCmd.getDynamicStateBaseAddress());
} else {
@@ -1147,7 +1119,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
dshUsed = csrDshHeap->getUsed() - dshUsed;
sshUsed = csrSshHeap->getUsed() - sshUsed;
if (dshPresent) {
if (this->dshRequired) {
EXPECT_LT(0u, dshUsed);
} else {
EXPECT_EQ(0u, dshUsed);
@@ -1160,14 +1132,6 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
EXPECT_GE(dshEstimated, dshUsed);
EXPECT_GE(sshEstimated, sshUsed);
ze_command_queue_desc_t queueDesc{};
queueDesc.ordinal = 0u;
queueDesc.index = 0u;
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
std::unique_ptr<L0::ult::CommandList> commandListImmediateCoexisting;
commandListImmediateCoexisting.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, result)));
auto &cmdContainerCoexisting = commandListImmediateCoexisting->commandContainer;
EXPECT_EQ(1u, cmdContainerCoexisting.getNumIddPerBlock());
EXPECT_TRUE(cmdContainerCoexisting.immediateCmdListSharedHeap(HeapType::DYNAMIC_STATE));
@@ -1191,7 +1155,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
size_t expectedSshAlignedSize = sshEstimated + ptrDiff(alignUp(ptr, sshAlignment), ptr);
size_t expectedDshAlignedSize = dshEstimated;
if (dshPresent) {
if (this->dshRequired) {
ptr = containerDshHeapCoexisting->getSpace(0);
expectedDshAlignedSize += ptrDiff(alignUp(ptr, dshAlignment), ptr);
@@ -1214,7 +1178,7 @@ HWTEST2_F(ImmediateCmdListSharedHeapsTest, givenMultipleCommandListsUsingSharedH
dshUsed = csrDshHeap->getUsed() - dshUsed;
sshUsed = csrSshHeap->getUsed() - sshUsed;
if (dshPresent) {
if (this->dshRequired) {
EXPECT_LT(0u, dshUsed);
} else {
EXPECT_EQ(0u, dshUsed);

View File

@@ -247,7 +247,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigra
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
result = commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
result = commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto mockMemoryManager = reinterpret_cast<NEO::MockMemoryManager *>(neoDevice->getMemoryManager());