mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
performance: share inter-module ISA allocations
Related-To: NEO-10258 Currently each module has it's own GA for kernel ISA's. This change allows new modules to reuse existing allocation. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
349a86a8dc
commit
10ed479b16
@@ -502,7 +502,7 @@ void ModuleTranslationUnit::processDebugData() {
|
||||
|
||||
ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
|
||||
: device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
|
||||
moduleBuildLog(moduleBuildLog), kernelsIsaParentRegion(nullptr), type(type) {
|
||||
moduleBuildLog(moduleBuildLog), type(type) {
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
this->isaAllocationPageSize = gfxCoreHelper.useSystemMemoryPlacementForISA(hwInfo) ? MemoryConstants::pageSize : MemoryConstants::pageSize64k;
|
||||
@@ -516,9 +516,9 @@ ModuleImp::~ModuleImp() {
|
||||
}
|
||||
}
|
||||
this->kernelImmDatas.clear();
|
||||
if (this->kernelsIsaParentRegion) {
|
||||
DEBUG_BREAK_IF(this->device->getNEODevice()->getMemoryManager() == nullptr);
|
||||
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->kernelsIsaParentRegion.release());
|
||||
if (this->sharedIsaAllocation) {
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
neoDevice->getIsaPoolAllocator().freeSharedIsaAllocation(this->sharedIsaAllocation.release());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -566,7 +566,7 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
|
||||
linkageSuccessful &= populateHostGlobalSymbolsMap(this->translationUnit->programInfo.globalsDeviceToHostNameMap);
|
||||
this->updateBuildLog(neoDevice);
|
||||
|
||||
if ((this->isFullyLinked && this->type == ModuleType::user) || (this->kernelsIsaParentRegion && this->type == ModuleType::builtin)) {
|
||||
if ((this->isFullyLinked && this->type == ModuleType::user) || (this->sharedIsaAllocation && this->type == ModuleType::builtin)) {
|
||||
this->transferIsaSegmentsToAllocation(neoDevice, nullptr);
|
||||
|
||||
if (device->getL0Debugger()) {
|
||||
@@ -585,29 +585,31 @@ void ModuleImp::transferIsaSegmentsToAllocation(NEO::Device *neoDevice, const NE
|
||||
const auto &productHelper = neoDevice->getProductHelper();
|
||||
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
|
||||
|
||||
if (this->kernelsIsaParentRegion && this->kernelImmDatas.size()) {
|
||||
if (this->sharedIsaAllocation && this->kernelImmDatas.size()) {
|
||||
if (this->kernelImmDatas[0]->isIsaCopiedToAllocation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto isaBufferSize = this->kernelsIsaParentRegion->getUnderlyingBufferSize();
|
||||
const auto isaBufferSize = this->sharedIsaAllocation->getSize();
|
||||
DEBUG_BREAK_IF(isaBufferSize == 0);
|
||||
auto isaBuffer = std::vector<std::byte>(isaBufferSize);
|
||||
std::memset(isaBuffer.data(), 0x0, isaBufferSize);
|
||||
|
||||
auto moduleOffset = sharedIsaAllocation->getOffset();
|
||||
for (auto &kernelImmData : this->kernelImmDatas) {
|
||||
DEBUG_BREAK_IF(kernelImmData->isIsaCopiedToAllocation());
|
||||
kernelImmData->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
kernelImmData->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
|
||||
auto [kernelHeapPtr, kernelHeapSize] = this->getKernelHeapPointerAndSize(kernelImmData, isaSegmentsForPatching);
|
||||
auto offset = kernelImmData->getIsaOffsetInParentAllocation();
|
||||
memcpy_s(isaBuffer.data() + offset, isaBufferSize - offset, kernelHeapPtr, kernelHeapSize);
|
||||
auto isaOffset = kernelImmData->getIsaOffsetInParentAllocation() - moduleOffset;
|
||||
memcpy_s(isaBuffer.data() + isaOffset, isaBufferSize - isaOffset, kernelHeapPtr, kernelHeapSize);
|
||||
}
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *this->kernelsIsaParentRegion),
|
||||
auto moduleAllocation = this->sharedIsaAllocation->getGraphicsAllocation();
|
||||
auto lock = this->sharedIsaAllocation->obtainSharedAllocationLock();
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *moduleAllocation),
|
||||
*neoDevice,
|
||||
this->kernelsIsaParentRegion.get(),
|
||||
0u,
|
||||
moduleAllocation,
|
||||
moduleOffset,
|
||||
isaBuffer.data(),
|
||||
isaBuffer.size());
|
||||
for (auto &kernelImmData : kernelImmDatas) {
|
||||
@@ -804,16 +806,17 @@ ze_result_t ModuleImp::setIsaGraphicsAllocations() {
|
||||
|
||||
bool debuggerDisabled = (this->device->getL0Debugger() == nullptr);
|
||||
if (debuggerDisabled && kernelsIsaTotalSize <= isaAllocationPageSize) {
|
||||
if (auto allocation = this->allocateKernelsIsaMemory(kernelsIsaTotalSize); allocation == nullptr) {
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
auto &isaAllocator = neoDevice->getIsaPoolAllocator();
|
||||
auto crossModuleAllocation = isaAllocator.requestGraphicsAllocationForIsa(this->type == ModuleType::builtin, kernelsIsaTotalSize);
|
||||
if (crossModuleAllocation == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
} else {
|
||||
this->kernelsIsaParentRegion.reset(allocation);
|
||||
}
|
||||
|
||||
this->sharedIsaAllocation.reset(crossModuleAllocation);
|
||||
for (auto i = 0lu; i < kernelsCount; i++) {
|
||||
auto [isaOffset, isaSize] = kernelsChunks[i];
|
||||
this->kernelImmDatas[i]->setIsaParentAllocation(this->kernelsIsaParentRegion.get());
|
||||
this->kernelImmDatas[i]->setIsaSubAllocationOffset(isaOffset);
|
||||
this->kernelImmDatas[i]->setIsaParentAllocation(this->sharedIsaAllocation->getGraphicsAllocation());
|
||||
this->kernelImmDatas[i]->setIsaSubAllocationOffset(this->sharedIsaAllocation->getOffset() + isaOffset);
|
||||
this->kernelImmDatas[i]->setIsaSubAllocationSize(isaSize);
|
||||
}
|
||||
} else {
|
||||
@@ -1707,4 +1710,11 @@ bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO:
|
||||
return true;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *ModuleImp::getKernelsIsaParentAllocation() const {
|
||||
if (!sharedIsaAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
return sharedIsaAllocation->getGraphicsAllocation();
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
namespace NEO {
|
||||
struct KernelDescriptor;
|
||||
class SharedIsaAllocation;
|
||||
|
||||
namespace Zebin::Debug {
|
||||
struct Segments;
|
||||
@@ -125,7 +126,7 @@ struct ModuleImp : public Module {
|
||||
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
|
||||
|
||||
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
|
||||
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const { return kernelsIsaParentRegion.get(); }
|
||||
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const;
|
||||
|
||||
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
|
||||
|
||||
@@ -189,7 +190,7 @@ struct ModuleImp : public Module {
|
||||
std::unique_ptr<ModuleTranslationUnit> translationUnit;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> kernelsIsaParentRegion;
|
||||
std::unique_ptr<NEO::SharedIsaAllocation> sharedIsaAllocation;
|
||||
std::vector<std::shared_ptr<Kernel>> printfKernelContainer;
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
|
||||
NEO::Linker::RelocatedSymbolsMap symbols;
|
||||
|
||||
@@ -192,9 +192,14 @@ struct ModuleKernelIsaAllocationsFixture : public ModuleFixture {
|
||||
}
|
||||
|
||||
void givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned() {
|
||||
mockModule->allocateKernelsIsaMemoryCallBase = false;
|
||||
// Fill current pool so next request will try to allocate
|
||||
auto alloc = device->getNEODevice()->getIsaPoolAllocator().requestGraphicsAllocationForIsa(false, MemoryConstants::pageSize2M * 2);
|
||||
|
||||
auto memoryManager = reinterpret_cast<MockMemoryManager *>(device->getNEODevice()->getMemoryManager());
|
||||
memoryManager->failInDevicePoolWithError = true;
|
||||
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
device->getNEODevice()->getIsaPoolAllocator().freeSharedIsaAllocation(alloc);
|
||||
}
|
||||
|
||||
void givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned() {
|
||||
@@ -4713,15 +4718,17 @@ TEST_F(ModuleKernelImmDatasTest, givenDeviceOOMWhenMemoryManagerFailsToAllocateM
|
||||
moduleDesc.inputSize = src.size();
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
module.reset(nullptr);
|
||||
auto module = std::make_unique<Module>(device, moduleBuildLog, ModuleType::user);
|
||||
ASSERT_NE(nullptr, module.get());
|
||||
// Fill current pool so next request will try to allocate
|
||||
auto alloc = device->getNEODevice()->getIsaPoolAllocator().requestGraphicsAllocationForIsa(false, MemoryConstants::pageSize2M * 2);
|
||||
auto mockMemoryManager = static_cast<NEO::MockMemoryManager *>(neoDevice->getMemoryManager());
|
||||
mockMemoryManager->isMockHostMemoryManager = true;
|
||||
mockMemoryManager->forceFailureInPrimaryAllocation = true;
|
||||
|
||||
auto module = std::make_unique<Module>(device, moduleBuildLog, ModuleType::user);
|
||||
ASSERT_NE(nullptr, module.get());
|
||||
|
||||
auto result = module->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
device->getNEODevice()->getIsaPoolAllocator().freeSharedIsaAllocation(alloc);
|
||||
};
|
||||
|
||||
using MultiTileModuleTest = Test<MultiTileModuleFixture>;
|
||||
|
||||
@@ -451,5 +451,36 @@ TEST_F(ModuleTests, givenFP64EmulationEnabledWhenCreatingModuleThenEnableFP64Gen
|
||||
EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, BuildOptions::enableFP64GenEmu));
|
||||
};
|
||||
|
||||
TEST_F(ModuleTests, whenMultipleModulesCreatedThenModulesShareIsaAllocation) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
uint8_t binary[10];
|
||||
ze_module_desc_t moduleDesc = {};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
|
||||
moduleDesc.pInputModule = binary;
|
||||
moduleDesc.inputSize = 10;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
NEO::GraphicsAllocation *allocation;
|
||||
std::vector<std::unique_ptr<L0::ModuleImp>> modules;
|
||||
constexpr size_t numModules = 10;
|
||||
for (auto i = 0u; i < numModules; i++) {
|
||||
modules.emplace_back(new L0::ModuleImp(device, moduleBuildLog, ModuleType::user));
|
||||
modules[i]->initialize(&moduleDesc, device->getNEODevice());
|
||||
if (i == 0) {
|
||||
allocation = modules[i]->getKernelsIsaParentAllocation();
|
||||
}
|
||||
auto &vec = modules[i]->getKernelImmutableDataVector();
|
||||
auto offsetForImmData = vec[0]->getIsaOffsetInParentAllocation();
|
||||
for (auto &immData : vec) {
|
||||
EXPECT_EQ(offsetForImmData, immData->getIsaOffsetInParentAllocation());
|
||||
offsetForImmData += immData->getIsaSubAllocationSize();
|
||||
}
|
||||
// Verify that all imm datas share same parent allocation
|
||||
if (i != 0) {
|
||||
EXPECT_EQ(allocation, modules[i]->getKernelsIsaParentAllocation());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user