performance: share inter-module ISA allocations

Related-To: NEO-10258

Currently each module has it's own GA
for kernel ISA's. This change allows new modules to
reuse existing allocation.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-04-29 17:12:50 +00:00
committed by Compute-Runtime-Automation
parent 349a86a8dc
commit 10ed479b16
14 changed files with 414 additions and 35 deletions

View File

@@ -502,7 +502,7 @@ void ModuleTranslationUnit::processDebugData() {
ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
: device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
moduleBuildLog(moduleBuildLog), kernelsIsaParentRegion(nullptr), type(type) {
moduleBuildLog(moduleBuildLog), type(type) {
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto &hwInfo = device->getHwInfo();
this->isaAllocationPageSize = gfxCoreHelper.useSystemMemoryPlacementForISA(hwInfo) ? MemoryConstants::pageSize : MemoryConstants::pageSize64k;
@@ -516,9 +516,9 @@ ModuleImp::~ModuleImp() {
}
}
this->kernelImmDatas.clear();
if (this->kernelsIsaParentRegion) {
DEBUG_BREAK_IF(this->device->getNEODevice()->getMemoryManager() == nullptr);
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->kernelsIsaParentRegion.release());
if (this->sharedIsaAllocation) {
auto neoDevice = this->device->getNEODevice();
neoDevice->getIsaPoolAllocator().freeSharedIsaAllocation(this->sharedIsaAllocation.release());
}
}
@@ -566,7 +566,7 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
linkageSuccessful &= populateHostGlobalSymbolsMap(this->translationUnit->programInfo.globalsDeviceToHostNameMap);
this->updateBuildLog(neoDevice);
if ((this->isFullyLinked && this->type == ModuleType::user) || (this->kernelsIsaParentRegion && this->type == ModuleType::builtin)) {
if ((this->isFullyLinked && this->type == ModuleType::user) || (this->sharedIsaAllocation && this->type == ModuleType::builtin)) {
this->transferIsaSegmentsToAllocation(neoDevice, nullptr);
if (device->getL0Debugger()) {
@@ -585,29 +585,31 @@ void ModuleImp::transferIsaSegmentsToAllocation(NEO::Device *neoDevice, const NE
const auto &productHelper = neoDevice->getProductHelper();
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
if (this->kernelsIsaParentRegion && this->kernelImmDatas.size()) {
if (this->sharedIsaAllocation && this->kernelImmDatas.size()) {
if (this->kernelImmDatas[0]->isIsaCopiedToAllocation()) {
return;
}
const auto isaBufferSize = this->kernelsIsaParentRegion->getUnderlyingBufferSize();
const auto isaBufferSize = this->sharedIsaAllocation->getSize();
DEBUG_BREAK_IF(isaBufferSize == 0);
auto isaBuffer = std::vector<std::byte>(isaBufferSize);
std::memset(isaBuffer.data(), 0x0, isaBufferSize);
auto moduleOffset = sharedIsaAllocation->getOffset();
for (auto &kernelImmData : this->kernelImmDatas) {
DEBUG_BREAK_IF(kernelImmData->isIsaCopiedToAllocation());
kernelImmData->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
kernelImmData->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
auto [kernelHeapPtr, kernelHeapSize] = this->getKernelHeapPointerAndSize(kernelImmData, isaSegmentsForPatching);
auto offset = kernelImmData->getIsaOffsetInParentAllocation();
memcpy_s(isaBuffer.data() + offset, isaBufferSize - offset, kernelHeapPtr, kernelHeapSize);
auto isaOffset = kernelImmData->getIsaOffsetInParentAllocation() - moduleOffset;
memcpy_s(isaBuffer.data() + isaOffset, isaBufferSize - isaOffset, kernelHeapPtr, kernelHeapSize);
}
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *this->kernelsIsaParentRegion),
auto moduleAllocation = this->sharedIsaAllocation->getGraphicsAllocation();
auto lock = this->sharedIsaAllocation->obtainSharedAllocationLock();
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *moduleAllocation),
*neoDevice,
this->kernelsIsaParentRegion.get(),
0u,
moduleAllocation,
moduleOffset,
isaBuffer.data(),
isaBuffer.size());
for (auto &kernelImmData : kernelImmDatas) {
@@ -804,16 +806,17 @@ ze_result_t ModuleImp::setIsaGraphicsAllocations() {
bool debuggerDisabled = (this->device->getL0Debugger() == nullptr);
if (debuggerDisabled && kernelsIsaTotalSize <= isaAllocationPageSize) {
if (auto allocation = this->allocateKernelsIsaMemory(kernelsIsaTotalSize); allocation == nullptr) {
auto neoDevice = this->device->getNEODevice();
auto &isaAllocator = neoDevice->getIsaPoolAllocator();
auto crossModuleAllocation = isaAllocator.requestGraphicsAllocationForIsa(this->type == ModuleType::builtin, kernelsIsaTotalSize);
if (crossModuleAllocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
this->kernelsIsaParentRegion.reset(allocation);
}
this->sharedIsaAllocation.reset(crossModuleAllocation);
for (auto i = 0lu; i < kernelsCount; i++) {
auto [isaOffset, isaSize] = kernelsChunks[i];
this->kernelImmDatas[i]->setIsaParentAllocation(this->kernelsIsaParentRegion.get());
this->kernelImmDatas[i]->setIsaSubAllocationOffset(isaOffset);
this->kernelImmDatas[i]->setIsaParentAllocation(this->sharedIsaAllocation->getGraphicsAllocation());
this->kernelImmDatas[i]->setIsaSubAllocationOffset(this->sharedIsaAllocation->getOffset() + isaOffset);
this->kernelImmDatas[i]->setIsaSubAllocationSize(isaSize);
}
} else {
@@ -1707,4 +1710,11 @@ bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO:
return true;
}
NEO::GraphicsAllocation *ModuleImp::getKernelsIsaParentAllocation() const {
if (!sharedIsaAllocation) {
return nullptr;
}
return sharedIsaAllocation->getGraphicsAllocation();
}
} // namespace L0

View File

@@ -23,6 +23,7 @@
namespace NEO {
struct KernelDescriptor;
class SharedIsaAllocation;
namespace Zebin::Debug {
struct Segments;
@@ -125,7 +126,7 @@ struct ModuleImp : public Module {
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const { return kernelsIsaParentRegion.get(); }
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const;
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
@@ -189,7 +190,7 @@ struct ModuleImp : public Module {
std::unique_ptr<ModuleTranslationUnit> translationUnit;
ModuleBuildLog *moduleBuildLog = nullptr;
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> kernelsIsaParentRegion;
std::unique_ptr<NEO::SharedIsaAllocation> sharedIsaAllocation;
std::vector<std::shared_ptr<Kernel>> printfKernelContainer;
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
NEO::Linker::RelocatedSymbolsMap symbols;