Remove unused useSingleSubdevice flag

Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2022-12-16 10:52:55 +00:00
committed by Compute-Runtime-Automation
parent 1df4adfd8b
commit 810241e2a8
15 changed files with 11 additions and 92 deletions

View File

@@ -107,7 +107,6 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
this->csr->isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
false, // useGlobalAtomics
this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired

View File

@@ -100,7 +100,7 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false, false);
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false);
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());

View File

@@ -599,11 +599,9 @@ HWTEST2_F(CommandQueueExecuteCommandListsImplicitScalingDisabled, givenCommandLi
struct MockCsr : NEO::CommandStreamReceiverHw<FamilyType> {
using NEO::CommandStreamReceiverHw<FamilyType>::CommandStreamReceiverHw;
NEO::SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
useSingleSubdeviceValue = batchBuffer.useSingleSubdevice;
submitBatchBufferCalled++;
return NEO::CommandStreamReceiver::submitBatchBuffer(batchBuffer, allocationsForResidency);
}
bool useSingleSubdeviceValue = false;
uint32_t submitBatchBufferCalled = 0;
};
@@ -633,7 +631,6 @@ HWTEST2_F(CommandQueueExecuteCommandListsImplicitScalingDisabled, givenCommandLi
auto result = pCommandQueue->executeCommandLists(1, commandListCooperative, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, pMockCsr->submitBatchBufferCalled);
EXPECT_TRUE(pMockCsr->useSingleSubdeviceValue);
auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandListWithNonCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u);
@@ -644,7 +641,6 @@ HWTEST2_F(CommandQueueExecuteCommandListsImplicitScalingDisabled, givenCommandLi
result = pCommandQueue->executeCommandLists(1, commandListNonCooperative, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, pMockCsr->submitBatchBufferCalled);
EXPECT_FALSE(pMockCsr->useSingleSubdeviceValue);
pCommandQueue->destroy();
}

View File

@@ -813,7 +813,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
useGlobalAtomics, // useGlobalAtomics
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
kernel->requiresMemoryMigration(), // memoryMigrationRequired
@@ -1060,7 +1059,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
false, // useGlobalAtomics
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
false, // memoryMigrationRequired

View File

@@ -75,7 +75,6 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
false, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
@@ -206,7 +205,6 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
kernel->requiresMemoryMigration(), // memoryMigrationRequired
@@ -382,7 +380,6 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
commandStreamReceiver.isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
false, // useGlobalAtomics
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
false, // memoryMigrationRequired

View File

@@ -1446,26 +1446,6 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCsrIsMarkedWithNewResourceThen
EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesFromSingleSubdeviceThenCallBatchedSubmission) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch;
commandStreamReceiver.wasSubmittedToSingleSubdevice = true;
flushTask(commandStreamReceiver);
EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesToSingleSubdeviceThenCallBatchedSubmission) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch;
flushTaskFlags.useSingleSubdevice = true;
flushTask(commandStreamReceiver);
EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFlushOnGpuIdleThenCallBatchedSubmission) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch;

View File

@@ -232,7 +232,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &c
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, 0, taskStartAddress, nullptr,
false, false, QueueThrottle::MEDIUM, NEO::QueueSliceCount::defaultSliceCount,
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, false, (submitCSR || dispatchBcsFlags.hasStallingCmds),
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, (submitCSR || dispatchBcsFlags.hasStallingCmds),
dispatchBcsFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
@@ -282,11 +282,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
void *epiloguePipeControlLocation = nullptr;
PipeControlArgs args;
bool csrFlush = this->wasSubmittedToSingleSubdevice != dispatchFlags.useSingleSubdevice;
csrFlush |= DebugManager.flags.ForceCsrFlushing.get();
if (csrFlush) {
if (DebugManager.flags.ForceCsrFlushing.get()) {
flushBatchedSubmissions();
}
@@ -694,7 +690,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds),
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, (submitCSR || dispatchFlags.hasStallingCmds),
dispatchFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
@@ -725,8 +721,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
this->makeSurfacePackNonResident(this->getResidencyAllocations(), true);
}
this->wasSubmittedToSingleSubdevice = dispatchFlags.useSingleSubdevice;
if (this->dispatchMode == DispatchMode::BatchedDispatch) {
// check if we are not over the budget, if we are do implicit flush
if (getMemoryManager()->isMemoryBudgetExhausted()) {
@@ -1268,7 +1262,7 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false, false};
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false};
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
@@ -1380,7 +1374,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true, false};
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, true, false};
this->latestSentTaskCount = taskCount + 1;
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());

View File

@@ -67,7 +67,7 @@ struct DispatchFlags {
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
bool usePerDSSbackedBufferP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
bool hasStallingCmds, bool hasRelaxedOrderingDependencies) : csrDependencies(csrDependenciesP),
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
@@ -92,7 +92,6 @@ struct DispatchFlags {
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
useSingleSubdevice(useSingleSubdeviceP),
useGlobalAtomics(useGlobalAtomicsP),
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP),
@@ -125,7 +124,6 @@ struct DispatchFlags {
bool outOfOrderExecutionAllowed = false;
bool epilogueRequired = false;
bool usePerDssBackedBuffer = false;
bool useSingleSubdevice = false;
bool useGlobalAtomics = false;
bool areMultipleSubDevicesInContext = false;
bool memoryMigrationRequired = false;

View File

@@ -101,13 +101,13 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
size_t chainedBatchBufferStartOffset, uint64_t taskStartAddress, GraphicsAllocation *chainedBatchBuffer,
bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount,
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds,
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies)
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
requiresCoherency(requiresCoherency), low_priority(lowPriority),
throttle(throttle), sliceCount(sliceCount),
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds),
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies) {}
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {

View File

@@ -31,7 +31,6 @@ struct BatchBuffer {
size_t usedSize,
LinearStream *stream,
void *endCmdPtr,
bool useSingleSubdevice,
bool hasStallingCmds,
bool hasRelaxedOrderingDependencies);
BatchBuffer() {}
@@ -51,7 +50,6 @@ struct BatchBuffer {
LinearStream *stream = nullptr;
void *endCmdPtr = nullptr;
bool useSingleSubdevice = false;
bool hasStallingCmds = false;
bool hasRelaxedOrderingDependencies = false;
bool ringBufferRestartRequest = false;

View File

@@ -44,7 +44,7 @@ SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchB
contextIndex++;
if (DebugManager.flags.EnableWalkerPartition.get() == 0 || batchBuffer.useSingleSubdevice) {
if (DebugManager.flags.EnableWalkerPartition.get() == 0) {
return SubmissionStatus::SUCCESS;
}
}

View File

@@ -25,7 +25,6 @@ struct BatchBufferHelper {
usedSize, // usedSize
stream, // stream
nullptr, // endCmdPtr
false, // useSingleSubdevice
false, // hasStallingCmds
false // hasRelaxedOrderingDependencies
);

View File

@@ -38,7 +38,6 @@ struct DispatchFlagsHelper {
false, // outOfOrderExecutionAllowed
false, // epilogueRequired
false, // usePerDssBackedBuffer
false, // useSingleSubdevice
false, // useGlobalAtomics
false, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired

View File

@@ -96,6 +96,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
GraphicsAllocation *alloc = nullptr;
};

View File

@@ -574,45 +574,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, whenForceExecu
memoryManager->freeGraphicsMemory(tileInstancedAllocation);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenUseSingleSubdeviceParamSetWhenFlushingThenUseOnlyContext0) {
struct MockCsr : DrmCommandStreamReceiver<FamilyType> {
using DrmCommandStreamReceiver<FamilyType>::DrmCommandStreamReceiver;
int exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId, uint32_t index) override {
EXPECT_EQ(0u, execCalled);
EXPECT_EQ(0u, drmContextId);
EXPECT_EQ(0u, vmHandleId);
execCalled++;
return 0;
}
SubmissionStatus processResidency(const ResidencyContainer &inputAllocationsForResidency, uint32_t handleId) override {
EXPECT_EQ(0u, processResidencyCalled);
EXPECT_EQ(0u, handleId);
processResidencyCalled++;
return SubmissionStatus::SUCCESS;
}
uint32_t execCalled = 0;
uint32_t processResidencyCalled = 0;
};
auto csr = std::make_unique<MockCsr>(*executionEnvironment, 0, osContext->getDeviceBitfield(),
gemCloseWorkerMode::gemCloseWorkerActive);
csr->setupContext(*osContext);
const auto size = 1024u;
BufferObject *bufferObject = new BufferObject(drm, 3, 30, 0, 1);
BufferObjects bufferObjects{bufferObject};
auto allocation = new DrmAllocation(0, AllocationType::UNKNOWN, bufferObjects, nullptr, 0u, size, MemoryPool::LocalMemory);
csr->CommandStreamReceiver::makeResident(*allocation);
auto &cs = csr->getCS();
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.useSingleSubdevice = true;
csr->flush(batchBuffer, csr->getResidencyAllocations());
memoryManager->freeGraphicsMemory(allocation);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DrmImplicitScalingCommandStreamTest, givenDisabledImplicitScalingWhenFlushingThenUseOnlyOneContext) {
DebugManagerStateRestore debugRestore{};
DebugManager.flags.EnableWalkerPartition.set(0);