mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add per-DSS backed buffer programming to dispatch flags
Related-To: NEO-3220 Change-Id: I16711af6d5b2ca51ab2c92b680d253124774534f Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
@ -50,26 +50,27 @@ struct DispatchFlags {
|
||||
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint64_t sliceCount, bool blocking, bool dcFlush,
|
||||
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
|
||||
bool requiresCoherency, bool lowPriority, bool implicitFlush,
|
||||
bool outOfOrderExecutionAllowed, bool epilogueRequired) : csrDependencies(csrDependencies),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
|
||||
pipelineSelectArgs(pipelineSelectArgs),
|
||||
flushStampReference(flushStampReference),
|
||||
throttle(throttle),
|
||||
preemptionMode(preemptionMode),
|
||||
numGrfRequired(numGrfRequired),
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
useSLM(useSLM),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
|
||||
gsba32BitRequired(gsba32BitRequired),
|
||||
requiresCoherency(requiresCoherency),
|
||||
lowPriority(lowPriority),
|
||||
implicitFlush(implicitFlush),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
|
||||
epilogueRequired(epilogueRequired){};
|
||||
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
|
||||
pipelineSelectArgs(pipelineSelectArgs),
|
||||
flushStampReference(flushStampReference),
|
||||
throttle(throttle),
|
||||
preemptionMode(preemptionMode),
|
||||
numGrfRequired(numGrfRequired),
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
useSLM(useSLM),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
|
||||
gsba32BitRequired(gsba32BitRequired),
|
||||
requiresCoherency(requiresCoherency),
|
||||
lowPriority(lowPriority),
|
||||
implicitFlush(implicitFlush),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
|
||||
epilogueRequired(epilogueRequired),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBuffer){};
|
||||
CsrDependencies csrDependencies;
|
||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||
PipelineSelectArgs pipelineSelectArgs;
|
||||
@ -91,6 +92,7 @@ struct DispatchFlags {
|
||||
bool implicitFlush = false;
|
||||
bool outOfOrderExecutionAllowed = false;
|
||||
bool epilogueRequired = false;
|
||||
bool usePerDssBackedBuffer = false;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -75,7 +75,7 @@ void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, De
|
||||
programKernelDebugging(pCommandStream);
|
||||
}
|
||||
programGenSpecificPreambleWorkArounds(pCommandStream, device.getHardwareInfo());
|
||||
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
|
||||
if (perDssBackedBuffer != nullptr) {
|
||||
programPerDssBackedBuffer(pCommandStream, device.getHardwareInfo(), perDssBackedBuffer);
|
||||
}
|
||||
}
|
||||
|
@ -673,6 +673,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
auto specialPipelineSelectMode = false;
|
||||
Kernel *kernel = nullptr;
|
||||
bool usePerDssBackedBuffer = false;
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
@ -689,6 +690,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
if (kernel->hasUncacheableStatelessArgs()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
|
||||
if (kernel->requiresPerDssBackedBuffer()) {
|
||||
usePerDssBackedBuffer = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
@ -747,7 +752,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
(QueuePriority::LOW == priority), //lowPriority
|
||||
implicitFlush, //implicitFlush
|
||||
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
usePerDssBackedBuffer //usePerDssBackedBuffer
|
||||
);
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
@ -942,7 +948,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
false, //lowPriority
|
||||
(enqueueProperties.operation == EnqueueProperties::Operation::Blit), //implicitFlush
|
||||
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
false //usePerDssBackedBuffer
|
||||
);
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
@ -242,7 +242,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) {
|
||||
if (dispatchFlags.usePerDssBackedBuffer) {
|
||||
if (!perDssBackedBuffer) {
|
||||
createPerDssBackedBuffer(device);
|
||||
}
|
||||
@ -730,7 +730,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cm
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {
|
||||
if (!this->isPreambleSent) {
|
||||
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, this->perDssBackedBuffer);
|
||||
GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr;
|
||||
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse);
|
||||
this->isPreambleSent = true;
|
||||
this->lastSentL3Config = newL3Config;
|
||||
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
|
||||
|
@ -69,7 +69,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
false //usePerDssBackedBuffer
|
||||
);
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady);
|
||||
@ -227,7 +228,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
kernel->requiresPerDssBackedBuffer() //usePerDssBackedBuffer
|
||||
);
|
||||
|
||||
if (timestampPacketDependencies) {
|
||||
@ -332,7 +334,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
false //usePerDssBackedBuffer
|
||||
);
|
||||
|
||||
UNRECOVERABLE_IF(!commandStreamReceiver.peekTimestampPacketWriteEnabled());
|
||||
|
@ -408,6 +408,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
const bool kernelUsesLocalIds,
|
||||
const bool isCssUsed) const;
|
||||
|
||||
bool requiresPerDssBackedBuffer() const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/debug_settings/debug_settings_manager.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
|
||||
namespace NEO {
|
||||
@ -26,4 +27,9 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool Kernel::requiresPerDssBackedBuffer() const {
|
||||
return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
@ -1308,3 +1308,20 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro
|
||||
HWTEST_F(CommandQueueHwTest, givenEmptyDispatchGlobalsArgsWhenEnqueueInitDispatchGlobalsCalledThenErrorIsReturned) {
|
||||
EXPECT_EQ(CL_INVALID_VALUE, pCmdQ->enqueueInitDispatchGlobals(nullptr, 0, nullptr, nullptr));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, WhenForcePerDssBackedBufferProgrammingSetThenDispatchFlagsAreSetAccordingly) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForcePerDssBackedBufferProgramming = true;
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
auto mockKernel = mockKernelWithInternals.mockKernel;
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
size_t offset = 0;
|
||||
size_t gws = 64;
|
||||
size_t lws = 16;
|
||||
|
||||
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, status);
|
||||
EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer);
|
||||
}
|
||||
|
@ -1578,11 +1578,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferIsAllocatedItI
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.usePerDssBackedBuffer = true;
|
||||
|
||||
commandStreamReceiver.flushTask(commandStream,
|
||||
0,
|
||||
@ -1598,14 +1597,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEna
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledAndPerDssBackedBufferAlreadyPresentThenNewAllocationIsNotCreated) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
commandStreamReceiver.perDssBackedBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{MemoryConstants::pageSize});
|
||||
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.usePerDssBackedBuffer = true;
|
||||
|
||||
commandStreamReceiver.flushTask(commandStream,
|
||||
0,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -73,6 +73,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
||||
|
||||
CommandStreamReceiver *csr = nullptr;
|
||||
std::unique_ptr<MockDevice> device;
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false};
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false};
|
||||
GraphicsAllocation *alloc = nullptr;
|
||||
};
|
||||
|
@ -32,7 +32,8 @@ struct DispatchFlagsHelper {
|
||||
false, //lowPriority
|
||||
false, //implicitFlush
|
||||
false, //outOfOrderExecutionAllowed
|
||||
false //epilogueRequired
|
||||
false, //epilogueRequired
|
||||
false //usePerDssBackedBuffer
|
||||
);
|
||||
}
|
||||
};
|
||||
|
@ -2993,6 +2993,23 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKernelRequiresPerDssBackedBuffer) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer());
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
Reference in New Issue
Block a user