Add per-DSS backed buffer programming to dispatch flags

Related-To: NEO-3220

Change-Id: I16711af6d5b2ca51ab2c92b680d253124774534f
Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
Pawel Wilma
2020-01-29 14:15:10 +01:00
committed by sys_ocldev
parent 1593048735
commit 9cbafe8bdd
12 changed files with 90 additions and 37 deletions

View File

@ -50,26 +50,27 @@ struct DispatchFlags {
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint64_t sliceCount, bool blocking, bool dcFlush,
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush,
bool outOfOrderExecutionAllowed, bool epilogueRequired) : csrDependencies(csrDependencies),
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
pipelineSelectArgs(pipelineSelectArgs),
flushStampReference(flushStampReference),
throttle(throttle),
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
gsba32BitRequired(gsba32BitRequired),
requiresCoherency(requiresCoherency),
lowPriority(lowPriority),
implicitFlush(implicitFlush),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
epilogueRequired(epilogueRequired){};
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies),
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
pipelineSelectArgs(pipelineSelectArgs),
flushStampReference(flushStampReference),
throttle(throttle),
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
gsba32BitRequired(gsba32BitRequired),
requiresCoherency(requiresCoherency),
lowPriority(lowPriority),
implicitFlush(implicitFlush),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
epilogueRequired(epilogueRequired),
usePerDssBackedBuffer(usePerDSSbackedBuffer){};
CsrDependencies csrDependencies;
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
PipelineSelectArgs pipelineSelectArgs;
@ -91,6 +92,7 @@ struct DispatchFlags {
bool implicitFlush = false;
bool outOfOrderExecutionAllowed = false;
bool epilogueRequired = false;
bool usePerDssBackedBuffer = false;
};
struct CsrSizeRequestFlags {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2019 Intel Corporation
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -75,7 +75,7 @@ void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, De
programKernelDebugging(pCommandStream);
}
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
if (perDssBackedBuffer != nullptr) {
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
}
}

View File

@ -673,6 +673,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
auto specialPipelineSelectMode = false;
Kernel *kernel = nullptr;
bool usePerDssBackedBuffer = false;
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {
@ -689,6 +690,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
if (kernel->hasUncacheableStatelessArgs()) {
anyUncacheableArgs = true;
}
if (kernel->requiresPerDssBackedBuffer()) {
usePerDssBackedBuffer = true;
}
}
if (mediaSamplerRequired) {
@ -747,7 +752,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
(QueuePriority::LOW == priority), //lowPriority
implicitFlush, //implicitFlush
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
usePerDssBackedBuffer //usePerDssBackedBuffer
);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
@ -942,7 +948,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
false, //lowPriority
(enqueueProperties.operation == EnqueueProperties::Operation::Blit), //implicitFlush
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
false //usePerDssBackedBuffer
);
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {

View File

@ -242,7 +242,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
}
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
if (dispatchFlags.usePerDssBackedBuffer) {
if (!perDssBackedBuffer) {
createPerDssBackedBuffer(device);
}
@ -730,7 +730,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cm
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {
if (!this->isPreambleSent) {
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, this->perDssBackedBuffer);
GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr;
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse);
this->isPreambleSent = true;
this->lastSentL3Config = newL3Config;
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;

View File

@ -69,7 +69,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
false, //implicitFlush
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
false //usePerDssBackedBuffer
);
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady);
@ -227,7 +228,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
false, //implicitFlush
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
kernel->requiresPerDssBackedBuffer() //usePerDssBackedBuffer
);
if (timestampPacketDependencies) {
@ -332,7 +334,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
false, //implicitFlush
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
false //usePerDssBackedBuffer
);
UNRECOVERABLE_IF(!commandStreamReceiver.peekTimestampPacketWriteEnabled());

View File

@ -408,6 +408,8 @@ class Kernel : public BaseObject<_cl_kernel> {
const bool kernelUsesLocalIds,
const bool isCssUsed) const;
bool requiresPerDssBackedBuffer() const;
protected:
struct ObjectCounts {
uint32_t imageCount;

View File

@ -5,6 +5,7 @@
*
*/
#include "core/debug_settings/debug_settings_manager.h"
#include "runtime/kernel/kernel.h"
namespace NEO {
@ -26,4 +27,9 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
}
return CL_SUCCESS;
}
bool Kernel::requiresPerDssBackedBuffer() const {
return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
}
} // namespace NEO

View File

@ -1308,3 +1308,20 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro
HWTEST_F(CommandQueueHwTest, givenEmptyDispatchGlobalsArgsWhenEnqueueInitDispatchGlobalsCalledThenErrorIsReturned) {
EXPECT_EQ(CL_INVALID_VALUE, pCmdQ->enqueueInitDispatchGlobals(nullptr, 0, nullptr, nullptr));
}
HWTEST_F(CommandQueueHwTest, WhenForcePerDssBackedBufferProgrammingSetThenDispatchFlagsAreSetAccordingly) {
DebugManagerStateRestore restore;
DebugManager.flags.ForcePerDssBackedBufferProgramming = true;
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
auto mockKernel = mockKernelWithInternals.mockKernel;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
size_t offset = 0;
size_t gws = 64;
size_t lws = 16;
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, status);
EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer);
}

View File

@ -1578,11 +1578,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferIsAllocatedItI
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated) {
DebugManagerStateRestore restore;
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.usePerDssBackedBuffer = true;
commandStreamReceiver.flushTask(commandStream,
0,
@ -1598,14 +1597,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEna
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledAndPerDssBackedBufferAlreadyPresentThenNewAllocationIsNotCreated) {
DebugManagerStateRestore restore;
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto memoryManager = pDevice->getMemoryManager();
commandStreamReceiver.perDssBackedBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.usePerDssBackedBuffer = true;
commandStreamReceiver.flushTask(commandStream,
0,

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2019 Intel Corporation
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -73,6 +73,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false};
GraphicsAllocation *alloc = nullptr;
};

View File

@ -32,7 +32,8 @@ struct DispatchFlagsHelper {
false, //lowPriority
false, //implicitFlush
false, //outOfOrderExecutionAllowed
false //epilogueRequired
false, //epilogueRequired
false //usePerDssBackedBuffer
);
}
};

View File

@ -2993,6 +2993,23 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
}
TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKernelRequiresPerDssBackedBuffer) {
DebugManagerStateRestore restore;
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer());
}
TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
MockKernelWithInternals kernel(*device);
EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer());
}
namespace NEO {
template <typename GfxFamily>