mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
performance: Allow indirect allocs as pack on OpenCL
Related-To: NEO-11228 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ece79ba238
commit
e35b951a00
@@ -326,14 +326,9 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman
|
||||
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) {
|
||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
|
||||
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
|
||||
if (NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
|
||||
submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
|
||||
}
|
||||
auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(*(this->csr));
|
||||
|
||||
if (submitAsPack) {
|
||||
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
|
||||
} else {
|
||||
if (!submittedAsPack) {
|
||||
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
|
||||
NEO::ResidencyContainer residencyAllocations;
|
||||
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
|
||||
|
||||
@@ -1410,7 +1410,11 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed)) {
|
||||
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||
auto svmAllocsManager = this->getContext().getSVMAllocsManager();
|
||||
auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(commandStreamReceiver);
|
||||
if (!submittedAsPack) {
|
||||
svmAllocsManager->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1414,7 +1414,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAl
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenAllAllocationsAreMadeResident) {
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
@@ -1430,14 +1430,13 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResident
|
||||
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true);
|
||||
|
||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||
EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation));
|
||||
EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) {
|
||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenAllSharedAllocationsAreMadeResident) {
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
@@ -1452,8 +1451,7 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside
|
||||
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true);
|
||||
|
||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||
EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation));
|
||||
EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size());
|
||||
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation);
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||
@@ -2465,6 +2463,74 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor
|
||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenIndirectAllocationsArePacked) {
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto svmAllocationsManager = ctx.getSVMAllocsManager();
|
||||
auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields());
|
||||
deviceProperties.device = pDevice;
|
||||
auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
|
||||
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
|
||||
auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
|
||||
|
||||
// Verify that indirect allocation is always resident
|
||||
kernel->makeResident(csr);
|
||||
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||
|
||||
// Force to non-resident
|
||||
graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId());
|
||||
|
||||
// Verify that packed allocation is tracked and makeResident is called once
|
||||
kernel->makeResident(csr);
|
||||
EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledAndPackingIsDisabledThenIndirectAllocationsAreNotPacked) {
|
||||
DebugManagerStateRestore dbgStateRestore;
|
||||
debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
|
||||
|
||||
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true);
|
||||
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
auto svmAllocationsManager = ctx.getSVMAllocsManager();
|
||||
auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields());
|
||||
deviceProperties.device = pDevice;
|
||||
auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
|
||||
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
|
||||
auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
|
||||
|
||||
kernel->makeResident(csr);
|
||||
EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||
|
||||
// Force to non-resident
|
||||
graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId());
|
||||
|
||||
// Verify that makeResident is always called when allocation is not packed
|
||||
kernel->makeResident(csr);
|
||||
EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||
|
||||
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||
}
|
||||
|
||||
struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::setUp();
|
||||
|
||||
@@ -909,4 +909,15 @@ void SVMAllocsManager::makeResidentForAllocationsWithId(uint32_t allocationId, C
|
||||
}
|
||||
}
|
||||
|
||||
bool SVMAllocsManager::submitIndirectAllocationsAsPack(CommandStreamReceiver &csr) {
|
||||
auto submitAsPack = memoryManager->allowIndirectAllocationsAsPack(csr.getRootDeviceIndex());
|
||||
if (debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
|
||||
submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
|
||||
}
|
||||
|
||||
if (submitAsPack) {
|
||||
makeIndirectAllocationsResident(csr, csr.peekTaskCount() + 1u);
|
||||
}
|
||||
return submitAsPack;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -238,6 +238,8 @@ class SVMAllocsManager {
|
||||
|
||||
void initUsmAllocationsCaches(Device &device);
|
||||
|
||||
bool submitIndirectAllocationsAsPack(CommandStreamReceiver &csr);
|
||||
|
||||
protected:
|
||||
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
|
||||
const RootDeviceIndicesContainer &rootDeviceIndices,
|
||||
|
||||
@@ -464,7 +464,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT
|
||||
|
||||
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||
|
||||
svmManager->makeIndirectAllocationsResident(*csr, 1u);
|
||||
EXPECT_TRUE(svmManager->submitIndirectAllocationsAsPack(*csr));
|
||||
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(csr->getOsContext().getContextId()));
|
||||
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable());
|
||||
@@ -477,6 +477,39 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
|
||||
TEST_F(SVMLocalMemoryAllocatorTest, whenSubmitIndirectAllocationsAsPackCalledButAllocationsAsPackNotAllowedThenDontMakeResident) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
|
||||
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||
auto device = deviceFactory->rootDevices[0];
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
|
||||
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
|
||||
csr->setupContext(*device->getDefaultEngine().osContext);
|
||||
|
||||
void *cmdQ = reinterpret_cast<void *>(0x12345);
|
||||
auto mockPageFaultManager = new MockPageFaultManager();
|
||||
memoryManager->pageFaultManager.reset(mockPageFaultManager);
|
||||
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
|
||||
|
||||
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
|
||||
|
||||
ASSERT_NE(nullptr, ptr);
|
||||
auto graphicsAllocation = svmManager->getSVMAlloc(ptr);
|
||||
|
||||
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||
|
||||
EXPECT_FALSE(svmManager->submitIndirectAllocationsAsPack(*csr));
|
||||
|
||||
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||
EXPECT_EQ(svmManager->indirectAllocationsResidency.find(csr.get()), svmManager->indirectAllocationsResidency.end());
|
||||
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
|
||||
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) {
|
||||
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||
auto device = deviceFactory->rootDevices[0];
|
||||
|
||||
Reference in New Issue
Block a user