mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 04:33:58 +08:00
Refactor L3 programming.
- Do not do it via member setting. - Utilize DispatchFlags Change-Id: I75d4c8ea6c1e10ca0edeeb0d1c3883a549c1cb1f Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
3528179434
commit
f362739521
@@ -593,17 +593,21 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
previousTimestampPacketNodes->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
bool anyUncacheableArgs = false;
|
||||
auto requiresCoherency = false;
|
||||
for (auto surface : CreateRange(surfaces, surfaceCount)) {
|
||||
surface->makeResident(getGpgpuCommandStreamReceiver());
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
if (!surface->allowsL3Caching()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto mediaSamplerRequired = false;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
auto specialPipelineSelectMode = false;
|
||||
Kernel *kernel = nullptr;
|
||||
bool anyUncacheableArgs = false;
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
kernel = dispatchInfo.getKernel();
|
||||
@@ -659,10 +663,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
}
|
||||
}
|
||||
|
||||
if (anyUncacheableArgs) {
|
||||
getGpgpuCommandStreamReceiver().setDisableL3Cache(true);
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC;
|
||||
@@ -685,6 +685,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.multiEngineQueue = this->multiEngineQueue;
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
if (anyUncacheableArgs) {
|
||||
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
|
||||
}
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
gtpinNotifyPreFlushTask(this);
|
||||
}
|
||||
|
||||
@@ -104,9 +104,6 @@ void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer &alloc
|
||||
|
||||
void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation) {
|
||||
makeResident(*gfxAllocation);
|
||||
if (!isL3Capable(*gfxAllocation)) {
|
||||
setDisableL3Cache(true);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
|
||||
|
||||
@@ -169,9 +169,6 @@ class CommandStreamReceiver {
|
||||
|
||||
virtual cl_int expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
|
||||
|
||||
void setDisableL3Cache(bool val) {
|
||||
disableL3Cache = val;
|
||||
}
|
||||
bool isMultiOsContextCapable() const;
|
||||
|
||||
void setLatestSentTaskCount(uint32_t latestSentTaskCount) {
|
||||
@@ -246,7 +243,6 @@ class CommandStreamReceiver {
|
||||
bool bindingTableBaseAddressRequired = false;
|
||||
bool mediaVfeStateDirty = true;
|
||||
bool lastVmeSubslicesConfig = false;
|
||||
bool disableL3Cache = false;
|
||||
bool stallingPipeControlOnNextFlushRequired = false;
|
||||
bool timestampPacketWriteEnabled = false;
|
||||
bool nTo1SubmissionModelEnabled = false;
|
||||
|
||||
@@ -271,9 +271,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
|
||||
|
||||
auto requiredL3Index = CacheSettings::l3CacheOn;
|
||||
if (this->disableL3Cache) {
|
||||
if (dispatchFlags.l3CacheSettings == L3CachingSettings::l3CacheOff) {
|
||||
requiredL3Index = CacheSettings::l3CacheOff;
|
||||
this->disableL3Cache = false;
|
||||
}
|
||||
|
||||
if (requiredL3Index != latestSentStatelessMocsConfig) {
|
||||
|
||||
@@ -30,6 +30,10 @@ constexpr auto csOverfetchSize = MemoryConstants::pageSize;
|
||||
namespace TimeoutControls {
|
||||
constexpr int64_t maxTimeout = std::numeric_limits<int64_t>::max();
|
||||
}
|
||||
namespace L3CachingSettings {
|
||||
constexpr uint32_t l3CacheOn = 0u;
|
||||
constexpr uint32_t l3CacheOff = 1u;
|
||||
} // namespace L3CachingSettings
|
||||
|
||||
struct DispatchFlags {
|
||||
CsrDependencies csrDependencies;
|
||||
@@ -37,6 +41,7 @@ struct DispatchFlags {
|
||||
QueueThrottle throttle = QueueThrottle::MEDIUM;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn;
|
||||
bool blocking = false;
|
||||
bool dcFlush = false;
|
||||
bool useSLM = false;
|
||||
|
||||
@@ -126,10 +126,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
IndirectHeap *ssh = kernelOperation->ssh.get();
|
||||
|
||||
auto requiresCoherency = false;
|
||||
auto anyUncacheableArgs = false;
|
||||
for (auto &surface : surfaces) {
|
||||
DEBUG_BREAK_IF(!surface);
|
||||
surface->makeResident(commandStreamReceiver);
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
if (!surface->allowsL3Caching()) {
|
||||
anyUncacheableArgs = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (printfHandler) {
|
||||
@@ -187,6 +191,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
dispatchFlags.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode();
|
||||
|
||||
if (anyUncacheableArgs) {
|
||||
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
gtpinNotifyPreFlushTask(&commandQueue);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
#include "runtime/helpers/cache_policy.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
|
||||
@@ -18,6 +19,7 @@ class Surface {
|
||||
virtual ~Surface() = default;
|
||||
virtual void makeResident(CommandStreamReceiver &csr) = 0;
|
||||
virtual Surface *duplicate() = 0;
|
||||
virtual bool allowsL3Caching() { return true; }
|
||||
bool IsCoherent;
|
||||
};
|
||||
|
||||
@@ -73,6 +75,10 @@ class HostPtrSurface : public Surface {
|
||||
return isPtrCopyAllowed;
|
||||
}
|
||||
|
||||
virtual bool allowsL3Caching() override {
|
||||
return isL3Capable(*gfxAllocation);
|
||||
}
|
||||
|
||||
protected:
|
||||
void *memoryPointer;
|
||||
size_t surfaceSize;
|
||||
|
||||
@@ -313,7 +313,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(CacheSettings::l3CacheOff, csr.latestSentStatelessMocsConfig);
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
|
||||
void *ptr2 = (void *)0x1040;
|
||||
|
||||
@@ -328,7 +327,6 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenRead
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CacheSettings::l3CacheOn, csr.latestSentStatelessMocsConfig);
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) {
|
||||
|
||||
@@ -154,36 +154,6 @@ TEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGetCSIsCalledThe
|
||||
EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenPtrAndSizeThatMeetL3CriteriaWhenMakeResidentHostPtrThenCsrEnableL3) {
|
||||
void *hostPtr = reinterpret_cast<void *>(0xF000);
|
||||
auto size = 0x2000u;
|
||||
|
||||
auto memoryManager = commandStreamReceiver->getMemoryManager();
|
||||
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{false, size}, hostPtr);
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
commandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation);
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenPtrAndSizeThatDoNotMeetL3CriteriaWhenMakeResidentHostPtrThenCsrDisableL3) {
|
||||
void *hostPtr = reinterpret_cast<void *>(0xF001);
|
||||
auto size = 0x2001u;
|
||||
|
||||
auto memoryManager = commandStreamReceiver->getMemoryManager();
|
||||
GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{false, size}, hostPtr);
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
commandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation);
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
EXPECT_TRUE(csr.disableL3Cache);
|
||||
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
|
||||
TEST_F(CommandStreamReceiverTest, memoryManagerHasAccessToCSR) {
|
||||
auto *memoryManager = commandStreamReceiver->getMemoryManager();
|
||||
EXPECT_EQ(commandStreamReceiver, memoryManager->getDefaultCommandStreamReceiver(0));
|
||||
@@ -206,7 +176,6 @@ HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTa
|
||||
HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig);
|
||||
EXPECT_FALSE(csr.disableL3Cache);
|
||||
}
|
||||
|
||||
TEST_F(CommandStreamReceiverTest, makeResidentPushesAllocationToMemoryManagerResidencyList) {
|
||||
|
||||
@@ -35,7 +35,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
|
||||
using BaseClass::CommandStreamReceiver::cleanupResources;
|
||||
using BaseClass::CommandStreamReceiver::commandStream;
|
||||
using BaseClass::CommandStreamReceiver::disableL3Cache;
|
||||
using BaseClass::CommandStreamReceiver::dispatchMode;
|
||||
using BaseClass::CommandStreamReceiver::executionEnvironment;
|
||||
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
|
||||
|
||||
Reference in New Issue
Block a user