Initialize kernel immutable data when kernel is created
This instead of when the associated module is created, to avoid allocating memory for kernels that are never created nor used. Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
parent
1d7e91de24
commit
a6ac10088c
|
@ -30,10 +30,11 @@ struct Device;
|
|||
struct Module;
|
||||
|
||||
struct KernelImmutableData {
|
||||
KernelImmutableData(L0::Device *l0device = nullptr);
|
||||
KernelImmutableData() = default;
|
||||
KernelImmutableData(L0::Device *l0device, NEO::KernelInfo *ki);
|
||||
virtual ~KernelImmutableData();
|
||||
|
||||
void initialize(NEO::KernelInfo *kernelInfo, Device *device,
|
||||
void initialize(Device *device,
|
||||
uint32_t computeUnitsUsedForSratch,
|
||||
NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel);
|
||||
|
||||
|
@ -65,6 +66,7 @@ struct KernelImmutableData {
|
|||
|
||||
protected:
|
||||
Device *device = nullptr;
|
||||
NEO::KernelInfo *kernelInfo = nullptr;
|
||||
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
|
||||
|
|
|
@ -64,7 +64,10 @@ inline SamplerPatchValues getAddrMode(ze_sampler_address_mode_t addressingMode)
|
|||
return SamplerPatchValues::AddressNone;
|
||||
}
|
||||
|
||||
KernelImmutableData::KernelImmutableData(L0::Device *l0device) : device(l0device) {}
|
||||
KernelImmutableData::KernelImmutableData(L0::Device *l0device, NEO::KernelInfo *ki) : device(l0device), kernelInfo(ki) {
|
||||
UNRECOVERABLE_IF(kernelInfo == nullptr);
|
||||
this->kernelDescriptor = &kernelInfo->kernelDescriptor;
|
||||
}
|
||||
|
||||
KernelImmutableData::~KernelImmutableData() {
|
||||
if (nullptr != isaGraphicsAllocation) {
|
||||
|
@ -96,14 +99,10 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
|
|||
}
|
||||
}
|
||||
|
||||
void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device,
|
||||
void KernelImmutableData::initialize(Device *device,
|
||||
uint32_t computeUnitsUsedForSratch,
|
||||
NEO::GraphicsAllocation *globalConstBuffer,
|
||||
NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel) {
|
||||
|
||||
UNRECOVERABLE_IF(kernelInfo == nullptr);
|
||||
this->kernelDescriptor = &kernelInfo->kernelDescriptor;
|
||||
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto memoryManager = device->getNEODevice()->getMemoryManager();
|
||||
|
||||
|
|
|
@ -350,10 +350,7 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice)
|
|||
|
||||
kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size());
|
||||
for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
|
||||
std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
|
||||
kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
|
||||
this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
|
||||
this->type == ModuleType::Builtin);
|
||||
std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device, ki)};
|
||||
kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
}
|
||||
this->maxGroupSize = static_cast<uint32_t>(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
|
||||
|
@ -364,6 +361,11 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice)
|
|||
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const {
|
||||
for (auto &kernelImmData : kernelImmDatas) {
|
||||
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) {
|
||||
if (kernelImmData->getIsaGraphicsAllocation() == nullptr) {
|
||||
kernelImmData->initialize(device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
|
||||
this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
|
||||
this->type == ModuleType::Builtin);
|
||||
}
|
||||
return kernelImmData.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -262,8 +262,8 @@ HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfa
|
|||
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
|
||||
kernelAttributes.simdSize = 8;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
kernelImmutableData.initialize(device, computeUnitsUsedForSratch, nullptr, nullptr, false);
|
||||
|
||||
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
|
||||
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
|
||||
|
@ -488,12 +488,12 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllo
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
|
||||
if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
|
||||
|
@ -514,12 +514,12 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowed
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
|
||||
|
||||
|
@ -538,12 +538,12 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallo
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
|
||||
|
||||
|
@ -556,9 +556,9 @@ TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalI
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, true);
|
||||
EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
|
||||
}
|
||||
|
||||
|
@ -568,9 +568,9 @@ TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonIntern
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
|
||||
}
|
||||
|
||||
|
@ -580,7 +580,7 @@ TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuff
|
|||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
|
@ -588,7 +588,7 @@ TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuff
|
|||
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
|
||||
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0,
|
||||
kernelImmutableData.initialize(device, 0,
|
||||
&globalConstBuffer, &globalVarBuffer, false);
|
||||
auto &resCont = kernelImmutableData.getResidencyContainer();
|
||||
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
|
||||
|
@ -615,9 +615,9 @@ TEST_F(KernelIsaTests, givenDebugONAndKernelDegugInfoWhenInitializingImmutableDa
|
|||
MockDebugger *debugger = new MockDebugger(neoDevice);
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData->vIsaSize, static_cast<uint32_t>(123));
|
||||
}
|
||||
|
||||
|
@ -640,9 +640,9 @@ TEST_F(KernelIsaTests, givenDebugONAndNoKernelDegugInfoWhenInitializingImmutable
|
|||
MockDebugger *debugger = new MockDebugger(neoDevice);
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
KernelImmutableData kernelImmutableData(device, &kernelInfo);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
kernelImmutableData.initialize(device, 0, nullptr, nullptr, false);
|
||||
EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData, nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,56 @@ HWTEST_F(ModuleTest, givenNonZeroCountWhenGettingKernelNamesThenNamesAreReturned
|
|||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTest, givenCallToGetKernelImmutableDataWithValidNameThenImutableDataIsReturned) {
|
||||
uint32_t count = 1;
|
||||
const char *kernelNames = nullptr;
|
||||
auto result = module->getKernelNames(&count, &kernelNames);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, count);
|
||||
EXPECT_STREQ(this->kernelName.c_str(), kernelNames);
|
||||
|
||||
auto kernelImmutableData = module->getKernelImmutableData(kernelNames);
|
||||
EXPECT_NE(nullptr, kernelImmutableData);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTest, givenCallToGetKernelImmutableDataWithInvalidNameThenNullptrisReturned) {
|
||||
uint32_t count = 1;
|
||||
const char *kernelNames = nullptr;
|
||||
auto result = module->getKernelNames(&count, &kernelNames);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, count);
|
||||
EXPECT_STREQ(this->kernelName.c_str(), kernelNames);
|
||||
|
||||
auto kernelImmutableData = module->getKernelImmutableData("nonexistent_kernel");
|
||||
EXPECT_EQ(nullptr, kernelImmutableData);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTest, givenTwoCallsToGetKernelImmutableDataThenKernelImmutableDataIsInitializedOnce) {
|
||||
uint32_t count = 1;
|
||||
const char *kernelNames = nullptr;
|
||||
auto result = module->getKernelNames(&count, &kernelNames);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, count);
|
||||
EXPECT_STREQ(this->kernelName.c_str(), kernelNames);
|
||||
|
||||
auto beforeKernelImmutableData = module->getKernelImmutableData(kernelNames);
|
||||
EXPECT_NE(nullptr, beforeKernelImmutableData);
|
||||
|
||||
auto beforeIsaAllocation = beforeKernelImmutableData->getIsaGraphicsAllocation();
|
||||
EXPECT_NE(nullptr, beforeIsaAllocation);
|
||||
|
||||
auto kernelImmutableData = module->getKernelImmutableData(kernelNames);
|
||||
EXPECT_NE(nullptr, kernelImmutableData);
|
||||
EXPECT_EQ(beforeKernelImmutableData, kernelImmutableData);
|
||||
|
||||
auto isaAllocation = kernelImmutableData->getIsaGraphicsAllocation();
|
||||
EXPECT_NE(nullptr, isaAllocation);
|
||||
EXPECT_EQ(beforeIsaAllocation, isaAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTest, givenUserModuleTypeWhenCreatingModuleThenCorrectTypeIsSet) {
|
||||
WhiteBox<Module> module(device, nullptr, ModuleType::User);
|
||||
EXPECT_EQ(ModuleType::User, module.type);
|
||||
|
@ -472,6 +522,11 @@ TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModule
|
|||
auto kernelInfo = std::make_unique<NEO::KernelInfo>();
|
||||
kernelInfo->heapInfo.pKernelHeap = kernelHeap;
|
||||
kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize;
|
||||
|
||||
auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(device, kernelInfo.get());
|
||||
kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
||||
{device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}));
|
||||
|
||||
module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
|
||||
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||
|
@ -481,10 +536,6 @@ TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModule
|
|||
module0->unresolvedExternalsInfo.push_back({unresolvedRelocation});
|
||||
module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u;
|
||||
|
||||
auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(device);
|
||||
kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
||||
{device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}));
|
||||
|
||||
auto isaPtr = kernelImmData->getIsaGraphicsAllocation()->getUnderlyingBuffer();
|
||||
|
||||
module0->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
|
|
Loading…
Reference in New Issue