mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
performance: Allow indirect allocs as pack on OpenCL
Related-To: NEO-11228 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ece79ba238
commit
e35b951a00
@@ -326,14 +326,9 @@ NEO::WaitStatus CommandQueueImp::CommandBufferManager::switchBuffers(NEO::Comman
|
|||||||
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) {
|
void CommandQueueImp::handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) {
|
||||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||||
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
|
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
|
||||||
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
|
auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(*(this->csr));
|
||||||
if (NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
|
|
||||||
submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (submitAsPack) {
|
if (!submittedAsPack) {
|
||||||
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
|
|
||||||
} else {
|
|
||||||
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
|
lockForIndirect = this->device->getDriverHandle()->getSvmAllocsManager()->obtainOwnership();
|
||||||
NEO::ResidencyContainer residencyAllocations;
|
NEO::ResidencyContainer residencyAllocations;
|
||||||
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
|
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
|
||||||
|
|||||||
@@ -1410,7 +1410,11 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
|||||||
if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
if (getHasIndirectAccess() && (unifiedMemoryControls.indirectDeviceAllocationsAllowed ||
|
||||||
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
unifiedMemoryControls.indirectHostAllocationsAllowed ||
|
||||||
unifiedMemoryControls.indirectSharedAllocationsAllowed)) {
|
unifiedMemoryControls.indirectSharedAllocationsAllowed)) {
|
||||||
this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
auto svmAllocsManager = this->getContext().getSVMAllocsManager();
|
||||||
|
auto submittedAsPack = svmAllocsManager->submitIndirectAllocationsAsPack(commandStreamReceiver);
|
||||||
|
if (!submittedAsPack) {
|
||||||
|
svmAllocsManager->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1414,7 +1414,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAl
|
|||||||
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) {
|
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenAllAllocationsAreMadeResident) {
|
||||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
@@ -1430,14 +1430,13 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResident
|
|||||||
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true);
|
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true);
|
||||||
|
|
||||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||||
EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size());
|
EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size());
|
||||||
EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation));
|
|
||||||
|
|
||||||
svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
|
||||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) {
|
HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenAllSharedAllocationsAreMadeResident) {
|
||||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
@@ -1452,8 +1451,7 @@ HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeReside
|
|||||||
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true);
|
mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true);
|
||||||
|
|
||||||
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver());
|
||||||
EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size());
|
EXPECT_EQ(2u, commandStreamReceiver.getResidencyAllocations().size());
|
||||||
EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation));
|
|
||||||
|
|
||||||
svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation);
|
||||||
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
|
||||||
@@ -2465,6 +2463,74 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor
|
|||||||
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired);
|
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenIndirectAllocationsArePacked) {
|
||||||
|
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||||
|
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||||
|
|
||||||
|
MockProgram program(toClDeviceVector(*pClDevice));
|
||||||
|
MockContext ctx;
|
||||||
|
program.setContext(&ctx);
|
||||||
|
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||||
|
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||||
|
kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true);
|
||||||
|
|
||||||
|
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||||
|
auto svmAllocationsManager = ctx.getSVMAllocsManager();
|
||||||
|
auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields());
|
||||||
|
deviceProperties.device = pDevice;
|
||||||
|
auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
|
||||||
|
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
|
||||||
|
auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
|
||||||
|
|
||||||
|
// Verify that indirect allocation is always resident
|
||||||
|
kernel->makeResident(csr);
|
||||||
|
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||||
|
|
||||||
|
// Force to non-resident
|
||||||
|
graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId());
|
||||||
|
|
||||||
|
// Verify that packed allocation is tracked and makeResident is called once
|
||||||
|
kernel->makeResident(csr);
|
||||||
|
EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||||
|
|
||||||
|
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledAndPackingIsDisabledThenIndirectAllocationsAreNotPacked) {
|
||||||
|
DebugManagerStateRestore dbgStateRestore;
|
||||||
|
debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
|
||||||
|
|
||||||
|
auto pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||||
|
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||||
|
|
||||||
|
MockProgram program(toClDeviceVector(*pClDevice));
|
||||||
|
MockContext ctx;
|
||||||
|
program.setContext(&ctx);
|
||||||
|
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||||
|
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||||
|
kernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true);
|
||||||
|
|
||||||
|
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||||
|
auto svmAllocationsManager = ctx.getSVMAllocsManager();
|
||||||
|
auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, ctx.getRootDeviceIndices(), ctx.getDeviceBitfields());
|
||||||
|
deviceProperties.device = pDevice;
|
||||||
|
auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
|
||||||
|
auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation);
|
||||||
|
auto graphicsAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation();
|
||||||
|
|
||||||
|
kernel->makeResident(csr);
|
||||||
|
EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||||
|
|
||||||
|
// Force to non-resident
|
||||||
|
graphicsAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, csr.getOsContext().getContextId());
|
||||||
|
|
||||||
|
// Verify that makeResident is always called when allocation is not packed
|
||||||
|
kernel->makeResident(csr);
|
||||||
|
EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(csr.getOsContext().getContextId()));
|
||||||
|
|
||||||
|
svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
|
struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
ClDeviceFixture::setUp();
|
ClDeviceFixture::setUp();
|
||||||
|
|||||||
@@ -909,4 +909,15 @@ void SVMAllocsManager::makeResidentForAllocationsWithId(uint32_t allocationId, C
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SVMAllocsManager::submitIndirectAllocationsAsPack(CommandStreamReceiver &csr) {
|
||||||
|
auto submitAsPack = memoryManager->allowIndirectAllocationsAsPack(csr.getRootDeviceIndex());
|
||||||
|
if (debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
|
||||||
|
submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (submitAsPack) {
|
||||||
|
makeIndirectAllocationsResident(csr, csr.peekTaskCount() + 1u);
|
||||||
|
}
|
||||||
|
return submitAsPack;
|
||||||
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -238,6 +238,8 @@ class SVMAllocsManager {
|
|||||||
|
|
||||||
void initUsmAllocationsCaches(Device &device);
|
void initUsmAllocationsCaches(Device &device);
|
||||||
|
|
||||||
|
bool submitIndirectAllocationsAsPack(CommandStreamReceiver &csr);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
|
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
|
||||||
const RootDeviceIndicesContainer &rootDeviceIndices,
|
const RootDeviceIndicesContainer &rootDeviceIndices,
|
||||||
|
|||||||
@@ -464,7 +464,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT
|
|||||||
|
|
||||||
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||||
|
|
||||||
svmManager->makeIndirectAllocationsResident(*csr, 1u);
|
EXPECT_TRUE(svmManager->submitIndirectAllocationsAsPack(*csr));
|
||||||
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||||
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(csr->getOsContext().getContextId()));
|
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(csr->getOsContext().getContextId()));
|
||||||
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable());
|
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable());
|
||||||
@@ -477,6 +477,39 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentT
|
|||||||
svmManager->freeSVMAlloc(ptr);
|
svmManager->freeSVMAlloc(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(SVMLocalMemoryAllocatorTest, whenSubmitIndirectAllocationsAsPackCalledButAllocationsAsPackNotAllowedThenDontMakeResident) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
|
||||||
|
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||||
|
auto device = deviceFactory->rootDevices[0];
|
||||||
|
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||||
|
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
|
||||||
|
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
|
||||||
|
csr->setupContext(*device->getDefaultEngine().osContext);
|
||||||
|
|
||||||
|
void *cmdQ = reinterpret_cast<void *>(0x12345);
|
||||||
|
auto mockPageFaultManager = new MockPageFaultManager();
|
||||||
|
memoryManager->pageFaultManager.reset(mockPageFaultManager);
|
||||||
|
|
||||||
|
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::sharedUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
|
||||||
|
|
||||||
|
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ);
|
||||||
|
|
||||||
|
ASSERT_NE(nullptr, ptr);
|
||||||
|
auto graphicsAllocation = svmManager->getSVMAlloc(ptr);
|
||||||
|
|
||||||
|
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||||
|
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||||
|
|
||||||
|
EXPECT_FALSE(svmManager->submitIndirectAllocationsAsPack(*csr));
|
||||||
|
|
||||||
|
EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(csr->getOsContext().getContextId()));
|
||||||
|
EXPECT_EQ(0u, svmManager->indirectAllocationsResidency.size());
|
||||||
|
EXPECT_EQ(svmManager->indirectAllocationsResidency.find(csr.get()), svmManager->indirectAllocationsResidency.end());
|
||||||
|
|
||||||
|
svmManager->freeSVMAlloc(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) {
|
TEST_F(SVMLocalMemoryAllocatorTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) {
|
||||||
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||||
auto device = deviceFactory->rootDevices[0];
|
auto device = deviceFactory->rootDevices[0];
|
||||||
|
|||||||
Reference in New Issue
Block a user