Refactor L3 programming.

- Do not do it via member setting.
- Utilize DispatchFlags

Change-Id: I75d4c8ea6c1e10ca0edeeb0d1c3883a549c1cb1f
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
Mrozek, Michal
2019-08-22 17:02:37 +02:00
committed by sys_ocldev
parent 3528179434
commit f362739521
10 changed files with 29 additions and 48 deletions

View File

@@ -593,17 +593,21 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
previousTimestampPacketNodes->makeResident(getGpgpuCommandStreamReceiver());
}
bool anyUncacheableArgs = false;
auto requiresCoherency = false;
for (auto surface : CreateRange(surfaces, surfaceCount)) {
surface->makeResident(getGpgpuCommandStreamReceiver());
requiresCoherency |= surface->IsCoherent;
if (!surface->allowsL3Caching()) {
anyUncacheableArgs = true;
}
}
auto mediaSamplerRequired = false;
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
auto specialPipelineSelectMode = false;
Kernel *kernel = nullptr;
bool anyUncacheableArgs = false;
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {
kernel = dispatchInfo.getKernel();
@@ -659,10 +663,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
}
}
if (anyUncacheableArgs) {
getGpgpuCommandStreamReceiver().setDisableL3Cache(true);
}
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;
dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC;
@@ -685,6 +685,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.multiEngineQueue = this->multiEngineQueue;
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
if (anyUncacheableArgs) {
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
}
if (gtpinIsGTPinInitialized()) {
gtpinNotifyPreFlushTask(this);
}

View File

@@ -104,9 +104,6 @@ void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer &alloc
void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation) {
makeResident(*gfxAllocation);
if (!isL3Capable(*gfxAllocation)) {
setDisableL3Cache(true);
}
}
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {

View File

@@ -169,9 +169,6 @@ class CommandStreamReceiver {
virtual cl_int expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
void setDisableL3Cache(bool val) {
disableL3Cache = val;
}
bool isMultiOsContextCapable() const;
void setLatestSentTaskCount(uint32_t latestSentTaskCount) {
@@ -246,7 +243,6 @@ class CommandStreamReceiver {
bool bindingTableBaseAddressRequired = false;
bool mediaVfeStateDirty = true;
bool lastVmeSubslicesConfig = false;
bool disableL3Cache = false;
bool stallingPipeControlOnNextFlushRequired = false;
bool timestampPacketWriteEnabled = false;
bool nTo1SubmissionModelEnabled = false;

View File

@@ -271,9 +271,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
auto requiredL3Index = CacheSettings::l3CacheOn;
if (this->disableL3Cache) {
if (dispatchFlags.l3CacheSettings == L3CachingSettings::l3CacheOff) {
requiredL3Index = CacheSettings::l3CacheOff;
this->disableL3Cache = false;
}
if (requiredL3Index != latestSentStatelessMocsConfig) {

View File

@@ -30,6 +30,10 @@ constexpr auto csOverfetchSize = MemoryConstants::pageSize;
namespace TimeoutControls {
constexpr int64_t maxTimeout = std::numeric_limits<int64_t>::max();
}
namespace L3CachingSettings {
constexpr uint32_t l3CacheOn = 0u;
constexpr uint32_t l3CacheOff = 1u;
} // namespace L3CachingSettings
struct DispatchFlags {
CsrDependencies csrDependencies;
@@ -37,6 +41,7 @@ struct DispatchFlags {
QueueThrottle throttle = QueueThrottle::MEDIUM;
PreemptionMode preemptionMode = PreemptionMode::Disabled;
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
bool blocking = false;
bool dcFlush = false;
bool useSLM = false;

View File

@@ -126,10 +126,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
IndirectHeap *ssh = kernelOperation->ssh.get();
auto requiresCoherency = false;
auto anyUncacheableArgs = false;
for (auto &surface : surfaces) {
DEBUG_BREAK_IF(!surface);
surface->makeResident(commandStreamReceiver);
requiresCoherency |= surface->IsCoherent;
if (!surface->allowsL3Caching()) {
anyUncacheableArgs = true;
}
}
if (printfHandler) {
@@ -187,6 +191,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
if (anyUncacheableArgs) {
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
}
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
gtpinNotifyPreFlushTask(&commandQueue);

View File

@@ -7,6 +7,7 @@
#pragma once
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/helpers/cache_policy.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/memory_manager/graphics_allocation.h"
@@ -18,6 +19,7 @@ class Surface {
virtual ~Surface() = default;
virtual void makeResident(CommandStreamReceiver &csr) = 0;
virtual Surface *duplicate() = 0;
virtual bool allowsL3Caching() { return true; }
bool IsCoherent;
};
@@ -73,6 +75,10 @@ class HostPtrSurface : public Surface {
return isPtrCopyAllowed;
}
virtual bool allowsL3Caching() override {
return isL3Capable(*gfxAllocation);
}
protected:
void *memoryPointer;
size_t surfaceSize;

View File

@@ -313,7 +313,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
EXPECT_EQ(CL_SUCCESS, retVal);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(CacheSettings::l3CacheOff, csr.latestSentStatelessMocsConfig);
EXPECT_FALSE(csr.disableL3Cache);
void *ptr2 = (void *)0x1040;
@@ -328,7 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
nullptr);
EXPECT_EQ(CacheSettings::l3CacheOn, csr.latestSentStatelessMocsConfig);
EXPECT_FALSE(csr.disableL3Cache);
}
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {

View File

@@ -154,36 +154,6 @@ TEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGetCSIsCalledThe
EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType());
}
HWTEST_F(CommandStreamReceiverTest, givenPtrAndSizeThatMeetL3CriteriaWhenMakeResidentHostPtrThenCsrEnableL3) {
void *hostPtr = reinterpret_cast<void *>(0xF000);
auto size = 0x2000u;
auto memoryManager = commandStreamReceiver->getMemoryManager();
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{false, size}, hostPtr);
ASSERT_NE(nullptr, graphicsAllocation);
commandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(csr.disableL3Cache);
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
HWTEST_F(CommandStreamReceiverTest, givenPtrAndSizeThatDoNotMeetL3CriteriaWhenMakeResidentHostPtrThenCsrDisableL3) {
void *hostPtr = reinterpret_cast<void *>(0xF001);
auto size = 0x2001u;
auto memoryManager = commandStreamReceiver->getMemoryManager();
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{false, size}, hostPtr);
ASSERT_NE(nullptr, graphicsAllocation);
commandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation);
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_TRUE(csr.disableL3Cache);
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
TEST_F(CommandStreamReceiverTest, memoryManagerHasAccessToCSR) {
auto *memoryManager = commandStreamReceiver->getMemoryManager();
EXPECT_EQ(commandStreamReceiver, memoryManager->getDefaultCommandStreamReceiver(0));
@@ -206,7 +176,6 @@ HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTa
HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig);
EXPECT_FALSE(csr.disableL3Cache);
}
TEST_F(CommandStreamReceiverTest, makeResidentPushesAllocationToMemoryManagerResidencyList) {

View File

@@ -35,7 +35,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
using BaseClass::CommandStreamReceiver::cleanupResources;
using BaseClass::CommandStreamReceiver::commandStream;
using BaseClass::CommandStreamReceiver::disableL3Cache;
using BaseClass::CommandStreamReceiver::dispatchMode;
using BaseClass::CommandStreamReceiver::executionEnvironment;
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;