Set Uncached MOCS for List only given stateless uncached args

Signed-off-by: Spruit, Neil R <neil.r.spruit@intel.com>
This commit is contained in:
Spruit, Neil R
2021-11-17 23:20:00 +00:00
committed by Compute-Runtime-Automation
parent a2401299a0
commit 6d8502847e
13 changed files with 21 additions and 21 deletions

View File

@@ -250,7 +250,7 @@ struct CommandList : _ze_command_list_handle_t {
bool isFlushTaskSubmissionEnabled = false;
bool isSyncModeQueue = false;
bool commandListSLMEnabled = false;
bool requiresUncachedMOCS = false;
bool requiresQueueUncachedMocs = false;
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);

View File

@@ -109,7 +109,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresUncachedMOCS = this->containsStatelessUncachedResource;
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
uint32_t partitionCount = 0;
NEO::Device *neoDevice = device->getNEODevice();

View File

@@ -225,6 +225,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
uint32_t partitionCount = 0;

View File

@@ -35,7 +35,7 @@ struct CommandQueueHw : public CommandQueueImp {
void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) override;
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream);
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed);
size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);

View File

@@ -82,7 +82,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto anyCommandListWithCooperativeKernels = false;
auto anyCommandListWithoutCooperativeKernels = false;
cachedMOCSAllowed = true;
bool cachedMOCSAllowed = true;
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
@@ -100,7 +100,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
anyCommandListWithoutCooperativeKernels = true;
}
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
if (commandList->requiresUncachedMOCS && cachedMOCSAllowed == true) {
if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) {
cachedMOCSAllowed = false;
}
}
@@ -314,7 +314,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (gsbaStateDirty) {
auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child);
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child, cachedMOCSAllowed);
}
if (initialPreemptionMode) {

View File

@@ -30,7 +30,7 @@
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;

View File

@@ -81,7 +81,6 @@ struct CommandQueueImp : public CommandQueue {
ze_command_queue_mode_t getSynchronousMode() const;
virtual void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) = 0;
virtual bool getPreemptionCmdProgramming() = 0;
bool cachedMOCSAllowed = true;
protected:
MOCKABLE_VIRTUAL int submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,

View File

@@ -21,7 +21,7 @@
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {

View File

@@ -74,7 +74,7 @@ struct KernelHw : public KernelImp {
}
if (l3Enabled == false) {
this->kernelRequiresUncachedMocsCount++;
this->kernelRequiresQueueUncachedMocsCount++;
}
NEO::Device *neoDevice = module->getDevice()->getNEODevice();

View File

@@ -119,6 +119,7 @@ struct KernelImp : Kernel {
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }
bool getKernelRequiresUncachedMocs() { return (kernelRequiresUncachedMocsCount > 0); }
bool getKernelRequiresQueueUncachedMocs() { return (kernelRequiresQueueUncachedMocsCount > 0); }
void setKernelArgUncached(uint32_t index, bool val) { isArgUncached[index] = val; }
uint32_t *getGlobalOffsets() override {
@@ -202,6 +203,7 @@ struct KernelImp : Kernel {
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
uint32_t kernelRequiresUncachedMocsCount = false;
uint32_t kernelRequiresQueueUncachedMocsCount = false;
std::vector<bool> isArgUncached;
uint32_t globalOffsets[3] = {};

View File

@@ -444,7 +444,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
.Times(1); // instruction heap
}
commandQueue->programStateBaseAddress(0u, true, child);
commandQueue->programStateBaseAddress(0u, true, child, true);
if (isaInLocalMemory) {
EXPECT_CALL(*memoryManager, getInternalHeapBaseAddress(rootDeviceIndex, false))
@@ -460,7 +460,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
.Times(2);
}
commandQueue->programStateBaseAddress(0u, false, child);
commandQueue->programStateBaseAddress(0u, false, child, true);
commandQueue->destroy();
}
@@ -477,7 +477,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
uint32_t alignedSize = 4096u;
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
commandQueue->programStateBaseAddress(0u, true, child);
commandQueue->programStateBaseAddress(0u, true, child, true);
auto pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandQueue->commandStream->getSpace(sizeof(STATE_BASE_ADDRESS)));
uint32_t statelessMocsIndex = pSbaCmd->getStatelessDataPortAccessMemoryObjectControlState();
@@ -510,7 +510,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
uint32_t alignedSize = 4096u;
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
commandQueue->programStateBaseAddress(0u, true, child);
commandQueue->programStateBaseAddress(0u, true, child, true);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
@@ -549,7 +549,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
uint32_t alignedSize = 4096u;
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
commandQueue->programStateBaseAddress(0u, true, child);
commandQueue->programStateBaseAddress(0u, true, child, true);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();

View File

@@ -91,7 +91,7 @@ struct MultiDeviceCommandQueueExecuteCommandLists : public Test<MultiDeviceFixtu
ze_command_list_handle_t commandLists[numCommandLists];
};
HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenCachedMOCSAllowedIsFalse) {
HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
using PARSE = typename FamilyType::PARSE;
@@ -109,12 +109,10 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncach
auto commandList1 = whitebox_cast(CommandList::fromHandle(commandLists[0]));
auto commandList2 = whitebox_cast(CommandList::fromHandle(commandLists[1]));
commandList1->requiresUncachedMOCS = true;
commandList2->requiresUncachedMOCS = true;
commandList1->requiresQueueUncachedMocs = true;
commandList2->requiresQueueUncachedMocs = true;
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(commandQueue->cachedMOCSAllowed, false);
commandQueue->destroy();
}

View File

@@ -1529,7 +1529,7 @@ HWTEST2_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnaligne
memset(expectedSsInHeap.ssPtr, 0, size);
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
EXPECT_EQ(mockKernel.getKernelRequiresUncachedMocs(), true);
EXPECT_EQ(mockKernel.getKernelRequiresQueueUncachedMocs(), true);
}
struct MyMockKernel : public Mock<Kernel> {