mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
Reuse kernel allocation
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
77407e6bb1
commit
090bfb9642
@@ -307,7 +307,21 @@ void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
|
||||
}
|
||||
}
|
||||
|
||||
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
|
||||
if (executionEnvironment.memoryManager->isKernelBinaryReuseEnabled()) {
|
||||
auto lock = executionEnvironment.memoryManager->lockKernelAllocationMap();
|
||||
auto kernelName = kernelInfo->kernelDescriptor.kernelMetadata.kernelName;
|
||||
auto &storedBinaries = executionEnvironment.memoryManager->getKernelAllocationMap();
|
||||
auto kernelAllocations = storedBinaries.find(kernelName);
|
||||
if (kernelAllocations != storedBinaries.end()) {
|
||||
kernelAllocations->second.reuseCounter--;
|
||||
if (kernelAllocations->second.reuseCounter == 0) {
|
||||
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation);
|
||||
storedBinaries.erase(kernelAllocations);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
|
||||
}
|
||||
}
|
||||
delete kernelInfo;
|
||||
}
|
||||
|
||||
@@ -100,6 +100,34 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateM
|
||||
EXPECT_FALSE(retVal);
|
||||
}
|
||||
|
||||
TEST(KernelInfoTest, givenReuseKernelBinariesWhenCreateKernelAllocationThenReuseAllocationFromMap) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ReuseKernelBinaries.set(1);
|
||||
|
||||
auto factory = UltDeviceFactory{1, 0};
|
||||
auto device = factory.rootDevices[0];
|
||||
const size_t heapSize = 0x40;
|
||||
char heap[heapSize];
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = heapSize;
|
||||
kernelInfo.heapInfo.pKernelHeap = &heap;
|
||||
KernelInfo kernelInfo2;
|
||||
kernelInfo2.heapInfo.KernelHeapSize = heapSize;
|
||||
kernelInfo2.heapInfo.pKernelHeap = &heap;
|
||||
|
||||
EXPECT_EQ(0u, device->getMemoryManager()->getKernelAllocationMap().size());
|
||||
|
||||
auto retVal = kernelInfo.createKernelAllocation(*device, true);
|
||||
EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size());
|
||||
EXPECT_TRUE(retVal);
|
||||
|
||||
retVal = kernelInfo2.createKernelAllocation(*device, true);
|
||||
EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size());
|
||||
EXPECT_TRUE(retVal);
|
||||
|
||||
device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
using KernelInfoMultiRootDeviceTests = MultiRootDeviceFixture;
|
||||
|
||||
TEST_F(KernelInfoMultiRootDeviceTests, WhenCreatingKernelAllocationThenItHasCorrectRootDeviceIndex) {
|
||||
|
||||
@@ -620,6 +620,41 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelA
|
||||
EXPECT_EQ(0u, pProgram->getNumKernels());
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoThenDecreaseAllocationReuseCounter) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ReuseKernelBinaries.set(1);
|
||||
|
||||
pProgram->build(pProgram->getDevices(), nullptr, true);
|
||||
auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap();
|
||||
auto kernelName = pProgram->buildInfos[0].kernelInfoArray[0]->kernelDescriptor.kernelMetadata.kernelName;
|
||||
auto kernelAllocations = kernelAllocMap.find(kernelName);
|
||||
kernelAllocations->second.reuseCounter = 2u;
|
||||
|
||||
EXPECT_EQ(1u, pProgram->getNumKernels());
|
||||
for (auto i = 0u; i < pProgram->buildInfos.size(); i++) {
|
||||
pProgram->cleanCurrentKernelInfo(i);
|
||||
}
|
||||
EXPECT_EQ(0u, pProgram->getNumKernels());
|
||||
EXPECT_EQ(1u, kernelAllocations->second.reuseCounter);
|
||||
|
||||
pProgram->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation);
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoAndCounterEqualsZeroThenFreeAllocation) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ReuseKernelBinaries.set(1);
|
||||
|
||||
pProgram->build(pProgram->getDevices(), nullptr, true);
|
||||
auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap();
|
||||
|
||||
EXPECT_EQ(1u, pProgram->getNumKernels());
|
||||
for (auto i = 0u; i < pProgram->buildInfos.size(); i++) {
|
||||
pProgram->cleanCurrentKernelInfo(i);
|
||||
}
|
||||
EXPECT_EQ(0u, pProgram->getNumKernels());
|
||||
EXPECT_EQ(0u, kernelAllocMap.size());
|
||||
}
|
||||
|
||||
HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledButGpuIsNotYetDoneThenKernelAllocationIsPutOnDeferredFreeListAndCsrRegistersCacheFlush) {
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||
|
||||
@@ -373,6 +373,7 @@ ForceExtendedBufferSize = -1
|
||||
ForceExtendedUSMBufferSize = -1
|
||||
MakeIndirectAllocationsResidentAsPack = -1
|
||||
MakeEachAllocationResident = -1
|
||||
ReuseKernelBinaries = -1
|
||||
EnableChipsetUniqueUUID = -1
|
||||
ForceSimdMessageSizeInWalker = -1
|
||||
UseNewQueryTopoIoctl = 1
|
||||
|
||||
@@ -256,6 +256,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideUseKmdWaitFunction, -1, "-1: default (L0
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ResolveDependenciesViaPipeControls, -1, "-1: default , 0: disabled, 1: enabled. If enabled, instead of programming semaphores, dependencies are resolved using task levels")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver handles all indirect allocations as one pack instead of making them resident individually.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
||||
|
||||
@@ -236,6 +236,26 @@ class MemoryManager {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isKernelBinaryReuseEnabled() {
|
||||
auto reuseBinaries = false;
|
||||
|
||||
if (DebugManager.flags.ReuseKernelBinaries.get() != -1) {
|
||||
reuseBinaries = DebugManager.flags.ReuseKernelBinaries.get();
|
||||
}
|
||||
|
||||
return reuseBinaries;
|
||||
}
|
||||
|
||||
struct KernelAllocationInfo {
|
||||
KernelAllocationInfo(GraphicsAllocation *allocation, uint32_t reuseCounter) : kernelAllocation(allocation), reuseCounter(reuseCounter) {}
|
||||
|
||||
GraphicsAllocation *kernelAllocation;
|
||||
uint32_t reuseCounter;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, KernelAllocationInfo> &getKernelAllocationMap() { return this->kernelAllocationMap; };
|
||||
std::unique_lock<std::mutex> lockKernelAllocationMap() { return std::unique_lock<std::mutex>(this->kernelAllocationMutex); };
|
||||
|
||||
protected:
|
||||
bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
|
||||
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
|
||||
@@ -298,6 +318,8 @@ class MemoryManager {
|
||||
AlignmentSelector alignmentSelector = {};
|
||||
std::unique_ptr<std::once_flag[]> checkIsaPlacementOnceFlags;
|
||||
std::vector<bool> isaInLocalMemory;
|
||||
std::unordered_map<std::string, KernelAllocationInfo> kernelAllocationMap;
|
||||
std::mutex kernelAllocationMutex;
|
||||
};
|
||||
|
||||
std::unique_ptr<DeferredDeleter> createDeferredDeleter();
|
||||
|
||||
@@ -115,11 +115,33 @@ int32_t KernelInfo::getArgNumByName(const char *name) const {
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) {
|
||||
bool KernelInfo::createKernelAllocation(Device &device, bool internalIsa) {
|
||||
UNRECOVERABLE_IF(kernelAllocation);
|
||||
auto kernelIsaSize = heapInfo.KernelHeapSize;
|
||||
const auto allocType = internalIsa ? AllocationType::KERNEL_ISA_INTERNAL : AllocationType::KERNEL_ISA;
|
||||
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
|
||||
|
||||
if (device.getMemoryManager()->isKernelBinaryReuseEnabled()) {
|
||||
auto lock = device.getMemoryManager()->lockKernelAllocationMap();
|
||||
auto kernelName = this->kernelDescriptor.kernelMetadata.kernelName;
|
||||
auto &storedAllocations = device.getMemoryManager()->getKernelAllocationMap();
|
||||
auto kernelAllocations = storedAllocations.find(kernelName);
|
||||
if (kernelAllocations != storedAllocations.end()) {
|
||||
kernelAllocation = kernelAllocations->second.kernelAllocation;
|
||||
kernelAllocations->second.reuseCounter++;
|
||||
auto &hwInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
return MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *kernelAllocation),
|
||||
device, kernelAllocation, 0, heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(kernelIsaSize));
|
||||
} else {
|
||||
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
|
||||
storedAllocations.insert(std::make_pair(kernelName, MemoryManager::KernelAllocationInfo(kernelAllocation, 1u)));
|
||||
}
|
||||
} else {
|
||||
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
|
||||
}
|
||||
|
||||
if (!kernelAllocation) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ struct KernelInfo {
|
||||
uint32_t getConstantBufferSize() const;
|
||||
int32_t getArgNumByName(const char *name) const;
|
||||
|
||||
bool createKernelAllocation(const Device &device, bool internalIsa);
|
||||
bool createKernelAllocation(Device &device, bool internalIsa);
|
||||
void apply(const DeviceInfoKernelPayloadConstants &constants);
|
||||
|
||||
HeapInfo heapInfo = {};
|
||||
|
||||
Reference in New Issue
Block a user