mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 12:42:54 +08:00
Set Uncached MOCS for List only given stateless uncached args
Signed-off-by: Spruit, Neil R <neil.r.spruit@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a2401299a0
commit
6d8502847e
@@ -250,7 +250,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool isFlushTaskSubmissionEnabled = false;
|
||||
bool isSyncModeQueue = false;
|
||||
bool commandListSLMEnabled = false;
|
||||
bool requiresUncachedMOCS = false;
|
||||
bool requiresQueueUncachedMocs = false;
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||
|
||||
@@ -109,7 +109,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
|
||||
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
this->requiresUncachedMOCS = this->containsStatelessUncachedResource;
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
uint32_t partitionCount = 0;
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
|
||||
@@ -225,6 +225,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
|
||||
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
|
||||
|
||||
uint32_t partitionCount = 0;
|
||||
|
||||
@@ -35,7 +35,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) override;
|
||||
|
||||
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream);
|
||||
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed);
|
||||
size_t estimateStateBaseAddressCmdSize();
|
||||
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
auto anyCommandListWithCooperativeKernels = false;
|
||||
auto anyCommandListWithoutCooperativeKernels = false;
|
||||
|
||||
cachedMOCSAllowed = true;
|
||||
bool cachedMOCSAllowed = true;
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
@@ -100,7 +100,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
anyCommandListWithoutCooperativeKernels = true;
|
||||
}
|
||||
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
|
||||
if (commandList->requiresUncachedMOCS && cachedMOCSAllowed == true) {
|
||||
if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) {
|
||||
cachedMOCSAllowed = false;
|
||||
}
|
||||
}
|
||||
@@ -314,7 +314,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
if (gsbaStateDirty) {
|
||||
auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
|
||||
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child);
|
||||
programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child, cachedMOCSAllowed);
|
||||
}
|
||||
|
||||
if (initialPreemptionMode) {
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
|
||||
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
|
||||
|
||||
@@ -81,7 +81,6 @@ struct CommandQueueImp : public CommandQueue {
|
||||
ze_command_queue_mode_t getSynchronousMode() const;
|
||||
virtual void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) = 0;
|
||||
virtual bool getPreemptionCmdProgramming() = 0;
|
||||
bool cachedMOCSAllowed = true;
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL int submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) {
|
||||
void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
if (NEO::ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
|
||||
@@ -74,7 +74,7 @@ struct KernelHw : public KernelImp {
|
||||
}
|
||||
|
||||
if (l3Enabled == false) {
|
||||
this->kernelRequiresUncachedMocsCount++;
|
||||
this->kernelRequiresQueueUncachedMocsCount++;
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
|
||||
|
||||
@@ -119,6 +119,7 @@ struct KernelImp : Kernel {
|
||||
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
|
||||
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }
|
||||
bool getKernelRequiresUncachedMocs() { return (kernelRequiresUncachedMocsCount > 0); }
|
||||
bool getKernelRequiresQueueUncachedMocs() { return (kernelRequiresQueueUncachedMocsCount > 0); }
|
||||
void setKernelArgUncached(uint32_t index, bool val) { isArgUncached[index] = val; }
|
||||
|
||||
uint32_t *getGlobalOffsets() override {
|
||||
@@ -202,6 +203,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
bool kernelRequiresGenerationOfLocalIdsByRuntime = true;
|
||||
uint32_t kernelRequiresUncachedMocsCount = false;
|
||||
uint32_t kernelRequiresQueueUncachedMocsCount = false;
|
||||
std::vector<bool> isArgUncached;
|
||||
|
||||
uint32_t globalOffsets[3] = {};
|
||||
|
||||
@@ -444,7 +444,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
|
||||
.Times(1); // instruction heap
|
||||
}
|
||||
|
||||
commandQueue->programStateBaseAddress(0u, true, child);
|
||||
commandQueue->programStateBaseAddress(0u, true, child, true);
|
||||
|
||||
if (isaInLocalMemory) {
|
||||
EXPECT_CALL(*memoryManager, getInternalHeapBaseAddress(rootDeviceIndex, false))
|
||||
@@ -460,7 +460,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
|
||||
.Times(2);
|
||||
}
|
||||
|
||||
commandQueue->programStateBaseAddress(0u, false, child);
|
||||
commandQueue->programStateBaseAddress(0u, false, child, true);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
@@ -477,7 +477,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
uint32_t alignedSize = 4096u;
|
||||
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
|
||||
|
||||
commandQueue->programStateBaseAddress(0u, true, child);
|
||||
commandQueue->programStateBaseAddress(0u, true, child, true);
|
||||
auto pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandQueue->commandStream->getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
uint32_t statelessMocsIndex = pSbaCmd->getStatelessDataPortAccessMemoryObjectControlState();
|
||||
|
||||
@@ -510,7 +510,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
uint32_t alignedSize = 4096u;
|
||||
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
|
||||
|
||||
commandQueue->programStateBaseAddress(0u, true, child);
|
||||
commandQueue->programStateBaseAddress(0u, true, child, true);
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
|
||||
|
||||
@@ -549,7 +549,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
uint32_t alignedSize = 4096u;
|
||||
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
|
||||
|
||||
commandQueue->programStateBaseAddress(0u, true, child);
|
||||
commandQueue->programStateBaseAddress(0u, true, child, true);
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ struct MultiDeviceCommandQueueExecuteCommandLists : public Test<MultiDeviceFixtu
|
||||
ze_command_list_handle_t commandLists[numCommandLists];
|
||||
};
|
||||
|
||||
HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenCachedMOCSAllowedIsFalse) {
|
||||
HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using PARSE = typename FamilyType::PARSE;
|
||||
@@ -109,12 +109,10 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncach
|
||||
|
||||
auto commandList1 = whitebox_cast(CommandList::fromHandle(commandLists[0]));
|
||||
auto commandList2 = whitebox_cast(CommandList::fromHandle(commandLists[1]));
|
||||
commandList1->requiresUncachedMOCS = true;
|
||||
commandList2->requiresUncachedMOCS = true;
|
||||
commandList1->requiresQueueUncachedMocs = true;
|
||||
commandList2->requiresQueueUncachedMocs = true;
|
||||
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_EQ(commandQueue->cachedMOCSAllowed, false);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
|
||||
@@ -1529,7 +1529,7 @@ HWTEST2_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnaligne
|
||||
|
||||
memset(expectedSsInHeap.ssPtr, 0, size);
|
||||
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
|
||||
EXPECT_EQ(mockKernel.getKernelRequiresUncachedMocs(), true);
|
||||
EXPECT_EQ(mockKernel.getKernelRequiresQueueUncachedMocs(), true);
|
||||
}
|
||||
|
||||
struct MyMockKernel : public Mock<Kernel> {
|
||||
|
||||
Reference in New Issue
Block a user