Reuse kernel allocation

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-02-24 14:12:06 +00:00
committed by Compute-Runtime-Automation
parent 77407e6bb1
commit 090bfb9642
8 changed files with 127 additions and 4 deletions

View File

@@ -307,7 +307,21 @@ void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
}
}
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
if (executionEnvironment.memoryManager->isKernelBinaryReuseEnabled()) {
auto lock = executionEnvironment.memoryManager->lockKernelAllocationMap();
auto kernelName = kernelInfo->kernelDescriptor.kernelMetadata.kernelName;
auto &storedBinaries = executionEnvironment.memoryManager->getKernelAllocationMap();
auto kernelAllocations = storedBinaries.find(kernelName);
if (kernelAllocations != storedBinaries.end()) {
kernelAllocations->second.reuseCounter--;
if (kernelAllocations->second.reuseCounter == 0) {
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation);
storedBinaries.erase(kernelAllocations);
}
}
} else {
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
}
}
delete kernelInfo;
}

View File

@@ -100,6 +100,34 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateM
EXPECT_FALSE(retVal);
}
TEST(KernelInfoTest, givenReuseKernelBinariesWhenCreateKernelAllocationThenReuseAllocationFromMap) {
DebugManagerStateRestore restorer;
DebugManager.flags.ReuseKernelBinaries.set(1);
auto factory = UltDeviceFactory{1, 0};
auto device = factory.rootDevices[0];
const size_t heapSize = 0x40;
char heap[heapSize];
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = heapSize;
kernelInfo.heapInfo.pKernelHeap = &heap;
KernelInfo kernelInfo2;
kernelInfo2.heapInfo.KernelHeapSize = heapSize;
kernelInfo2.heapInfo.pKernelHeap = &heap;
EXPECT_EQ(0u, device->getMemoryManager()->getKernelAllocationMap().size());
auto retVal = kernelInfo.createKernelAllocation(*device, true);
EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size());
EXPECT_TRUE(retVal);
retVal = kernelInfo2.createKernelAllocation(*device, true);
EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size());
EXPECT_TRUE(retVal);
device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo.kernelAllocation);
}
using KernelInfoMultiRootDeviceTests = MultiRootDeviceFixture;
TEST_F(KernelInfoMultiRootDeviceTests, WhenCreatingKernelAllocationThenItHasCorrectRootDeviceIndex) {

View File

@@ -620,6 +620,41 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelA
EXPECT_EQ(0u, pProgram->getNumKernels());
}
TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoThenDecreaseAllocationReuseCounter) {
DebugManagerStateRestore restorer;
DebugManager.flags.ReuseKernelBinaries.set(1);
pProgram->build(pProgram->getDevices(), nullptr, true);
auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap();
auto kernelName = pProgram->buildInfos[0].kernelInfoArray[0]->kernelDescriptor.kernelMetadata.kernelName;
auto kernelAllocations = kernelAllocMap.find(kernelName);
kernelAllocations->second.reuseCounter = 2u;
EXPECT_EQ(1u, pProgram->getNumKernels());
for (auto i = 0u; i < pProgram->buildInfos.size(); i++) {
pProgram->cleanCurrentKernelInfo(i);
}
EXPECT_EQ(0u, pProgram->getNumKernels());
EXPECT_EQ(1u, kernelAllocations->second.reuseCounter);
pProgram->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation);
}
TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoAndCounterEqualsZeroThenFreeAllocation) {
DebugManagerStateRestore restorer;
DebugManager.flags.ReuseKernelBinaries.set(1);
pProgram->build(pProgram->getDevices(), nullptr, true);
auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap();
EXPECT_EQ(1u, pProgram->getNumKernels());
for (auto i = 0u; i < pProgram->buildInfos.size(); i++) {
pProgram->cleanCurrentKernelInfo(i);
}
EXPECT_EQ(0u, pProgram->getNumKernels());
EXPECT_EQ(0u, kernelAllocMap.size());
}
HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledButGpuIsNotYetDoneThenKernelAllocationIsPutOnDeferredFreeListAndCsrRegistersCacheFlush) {
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());

View File

@@ -373,6 +373,7 @@ ForceExtendedBufferSize = -1
ForceExtendedUSMBufferSize = -1
MakeIndirectAllocationsResidentAsPack = -1
MakeEachAllocationResident = -1
ReuseKernelBinaries = -1
EnableChipsetUniqueUUID = -1
ForceSimdMessageSizeInWalker = -1
UseNewQueryTopoIoctl = 1

View File

@@ -256,6 +256,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0
DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels")
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")

View File

@@ -236,6 +236,26 @@ class MemoryManager {
return false;
}
bool isKernelBinaryReuseEnabled() {
auto reuseBinaries = false;
if (DebugManager.flags.ReuseKernelBinaries.get() != -1) {
reuseBinaries = DebugManager.flags.ReuseKernelBinaries.get();
}
return reuseBinaries;
}
struct KernelAllocationInfo {
KernelAllocationInfo(GraphicsAllocation *allocation, uint32_t reuseCounter) : kernelAllocation(allocation), reuseCounter(reuseCounter) {}
GraphicsAllocation *kernelAllocation;
uint32_t reuseCounter;
};
std::unordered_map<std::string, KernelAllocationInfo> &getKernelAllocationMap() { return this->kernelAllocationMap; };
std::unique_lock<std::mutex> lockKernelAllocationMap() { return std::unique_lock<std::mutex>(this->kernelAllocationMutex); };
protected:
bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
@@ -298,6 +318,8 @@ class MemoryManager {
AlignmentSelector alignmentSelector = {};
std::unique_ptr<std::once_flag[]> checkIsaPlacementOnceFlags;
std::vector<bool> isaInLocalMemory;
std::unordered_map<std::string, KernelAllocationInfo> kernelAllocationMap;
std::mutex kernelAllocationMutex;
};
std::unique_ptr<DeferredDeleter> createDeferredDeleter();

View File

@@ -115,11 +115,33 @@ int32_t KernelInfo::getArgNumByName(const char *name) const {
return -1;
}
bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) {
bool KernelInfo::createKernelAllocation(Device &device, bool internalIsa) {
UNRECOVERABLE_IF(kernelAllocation);
auto kernelIsaSize = heapInfo.KernelHeapSize;
const auto allocType = internalIsa ? AllocationType::KERNEL_ISA_INTERNAL : AllocationType::KERNEL_ISA;
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
if (device.getMemoryManager()->isKernelBinaryReuseEnabled()) {
auto lock = device.getMemoryManager()->lockKernelAllocationMap();
auto kernelName = this->kernelDescriptor.kernelMetadata.kernelName;
auto &storedAllocations = device.getMemoryManager()->getKernelAllocationMap();
auto kernelAllocations = storedAllocations.find(kernelName);
if (kernelAllocations != storedAllocations.end()) {
kernelAllocation = kernelAllocations->second.kernelAllocation;
kernelAllocations->second.reuseCounter++;
auto &hwInfo = device.getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
return MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *kernelAllocation),
device, kernelAllocation, 0, heapInfo.pKernelHeap,
static_cast<size_t>(kernelIsaSize));
} else {
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
storedAllocations.insert(std::make_pair(kernelName, MemoryManager::KernelAllocationInfo(kernelAllocation, 1u)));
}
} else {
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
}
if (!kernelAllocation) {
return false;
}

View File

@@ -109,7 +109,7 @@ struct KernelInfo {
uint32_t getConstantBufferSize() const;
int32_t getArgNumByName(const char *name) const;
bool createKernelAllocation(const Device &device, bool internalIsa);
bool createKernelAllocation(Device &device, bool internalIsa);
void apply(const DeviceInfoKernelPayloadConstants &constants);
HeapInfo heapInfo = {};