feature: Optimize intra-module kernel ISA allocations

So far, there is a separate page allocated for each kernel's ISA within
`KernelImmutableData::initialize()`. Apparently the ISA blocks are often
much smaller than a 64k page, which leads to poor memory utilization and
was even observed to cause the device OOM error if a single module has
several keys.

Improve the situation by reusing the parent allocation (owned by the
module instance) for modules, which kernel ISAs can fit together within
a single 64k page. This improves the memory utilization on a single
module level.

Related-To: NEO-7788
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2023-08-09 13:39:39 +00:00
committed by Compute-Runtime-Automation
parent 1b7e178b25
commit c348831470
22 changed files with 649 additions and 177 deletions

View File

@@ -35,9 +35,9 @@ struct KernelImmutableData {
KernelImmutableData(L0::Device *l0device = nullptr); KernelImmutableData(L0::Device *l0device = nullptr);
virtual ~KernelImmutableData(); virtual ~KernelImmutableData();
ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch, MOCKABLE_VIRTUAL ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch,
NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer,
bool internalKernel); bool internalKernel);
const std::vector<NEO::GraphicsAllocation *> &getResidencyContainer() const { const std::vector<NEO::GraphicsAllocation *> &getResidencyContainer() const {
return residencyContainer; return residencyContainer;
@@ -48,7 +48,14 @@ struct KernelImmutableData {
} }
uint32_t getIsaSize() const; uint32_t getIsaSize() const;
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); } NEO::GraphicsAllocation *getIsaGraphicsAllocation() const;
void setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation);
inline NEO::GraphicsAllocation *getIsaParentAllocation() const { return isaParentAllocation; }
inline void setIsaParentAllocation(NEO::GraphicsAllocation *allocation) { isaParentAllocation = allocation; };
inline size_t getIsaOffsetInParentAllocation() const { return isaSubAllocationOffset; }
inline void setIsaSubAllocationOffset(size_t offset) { isaSubAllocationOffset = offset; }
inline void setIsaSubAllocationSize(size_t size) { isaSubAllocationSize = size; }
inline size_t getIsaSubAllocationSize() const { return isaSubAllocationSize; }
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); } const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
@@ -80,6 +87,9 @@ struct KernelImmutableData {
NEO::KernelInfo *kernelInfo = nullptr; NEO::KernelInfo *kernelInfo = nullptr;
NEO::KernelDescriptor *kernelDescriptor = nullptr; NEO::KernelDescriptor *kernelDescriptor = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr; std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
NEO::GraphicsAllocation *isaParentAllocation = nullptr;
size_t isaSubAllocationOffset = 0lu;
size_t isaSubAllocationSize = 0lu;
uint32_t crossThreadDataSize = 0; uint32_t crossThreadDataSize = 0;
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr; std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;

View File

@@ -75,20 +75,6 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
DeviceImp *deviceImp = static_cast<DeviceImp *>(device); DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
auto neoDevice = deviceImp->getActiveDevice(); auto neoDevice = deviceImp->getActiveDevice();
auto memoryManager = neoDevice->getMemoryManager();
auto kernelIsaSize = kernelInfo->heapInfo.kernelHeapSize;
UNRECOVERABLE_IF(kernelIsaSize == 0);
UNRECOVERABLE_IF(!kernelInfo->heapInfo.pKernelHeap);
const auto allocType = internalKernel ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(
{neoDevice->getRootDeviceIndex(), kernelIsaSize, allocType, neoDevice->getDeviceBitfield()});
if (!allocation) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
isaGraphicsAllocation.reset(allocation);
if (neoDevice->getDebugger() && kernelInfo->kernelDescriptor.external.debugData.get()) { if (neoDevice->getDebugger() && kernelInfo->kernelDescriptor.external.debugData.get()) {
createRelocatedDebugData(globalConstBuffer, globalVarBuffer); createRelocatedDebugData(globalConstBuffer, globalVarBuffer);
@@ -203,16 +189,39 @@ void KernelImmutableData::createRelocatedDebugData(NEO::GraphicsAllocation *glob
} }
} }
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) { NEO::GraphicsAllocation *KernelImmutableData::getIsaGraphicsAllocation() const {
if (baseAddress) { if (auto allocation = this->getIsaParentAllocation(); allocation != nullptr) {
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper(); DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress()); DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
return allocation;
} else {
DEBUG_BREAK_IF(this->isaGraphicsAllocation.get() == nullptr);
return this->isaGraphicsAllocation.get();
} }
return ZE_RESULT_SUCCESS;
} }
uint32_t KernelImmutableData::getIsaSize() const { uint32_t KernelImmutableData::getIsaSize() const {
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize()); if (this->getIsaParentAllocation()) {
DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
return static_cast<uint32_t>(this->isaSubAllocationSize);
} else {
return static_cast<uint32_t>(this->isaGraphicsAllocation->getUnderlyingBufferSize());
}
}
void KernelImmutableData::setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation) {
DEBUG_BREAK_IF(this->isaParentAllocation != nullptr);
this->isaGraphicsAllocation.reset(allocation);
}
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
if (baseAddress) {
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper();
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress() +
this->kernelImmData->getIsaOffsetInParentAllocation());
}
return ZE_RESULT_SUCCESS;
} }
KernelImp::KernelImp(Module *module) : module(module) {} KernelImp::KernelImp(Module *module) : module(module) {}
@@ -948,10 +957,12 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap); UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) { if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
isaAllocation->setAubWritable(true, std::numeric_limits<uint32_t>::max());
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(neoDevice->getRootDeviceEnvironment(), *isaAllocation), NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(neoDevice->getRootDeviceEnvironment(), *isaAllocation),
*neoDevice, *neoDevice,
isaAllocation, isaAllocation,
0, this->kernelImmData->getIsaOffsetInParentAllocation(),
this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap, this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap,
static_cast<size_t>(this->kernelImmData->getKernelInfo()->heapInfo.kernelHeapSize)); static_cast<size_t>(this->kernelImmData->getKernelInfo()->heapInfo.kernelHeapSize));
} }
@@ -1209,6 +1220,10 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
return getImmutableData()->getIsaGraphicsAllocation(); return getImmutableData()->getIsaGraphicsAllocation();
} }
uint64_t KernelImp::getIsaOffsetInParentAllocation() const {
return static_cast<uint64_t>(getImmutableData()->getIsaOffsetInParentAllocation());
}
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) { ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
auto &threadArbitrationPolicy = const_cast<NEO::ThreadArbitrationPolicy &>(getKernelDescriptor().kernelAttributes.threadArbitrationPolicy); auto &threadArbitrationPolicy = const_cast<NEO::ThreadArbitrationPolicy &>(getKernelDescriptor().kernelAttributes.threadArbitrationPolicy);
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) { if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {

View File

@@ -141,6 +141,7 @@ struct KernelImp : Kernel {
} }
NEO::GraphicsAllocation *getIsaAllocation() const override; NEO::GraphicsAllocation *getIsaAllocation() const override;
uint64_t getIsaOffsetInParentAllocation() const override;
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; } uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; } bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }

View File

@@ -32,6 +32,7 @@
#include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/string.h" #include "shared/source/helpers/string.h"
#include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h"
@@ -494,12 +495,19 @@ void ModuleTranslationUnit::processDebugData() {
ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
: device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)), : device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
moduleBuildLog(moduleBuildLog), type(type) { moduleBuildLog(moduleBuildLog), kernelsIsaParentRegion(nullptr), type(type) {
productFamily = device->getHwInfo().platform.eProductFamily; auto &gfxCoreHelper = device->getGfxCoreHelper();
auto &hwInfo = device->getHwInfo();
this->isaAllocationPageSize = gfxCoreHelper.useSystemMemoryPlacementForISA(hwInfo) ? MemoryConstants::pageSize : MemoryConstants::pageSize64k;
this->productFamily = hwInfo.platform.eProductFamily;
} }
ModuleImp::~ModuleImp() { ModuleImp::~ModuleImp() {
kernelImmDatas.clear(); this->kernelImmDatas.clear();
if (this->kernelsIsaParentRegion) {
DEBUG_BREAK_IF(this->device->getNEODevice()->getMemoryManager() == nullptr);
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->kernelsIsaParentRegion.release());
}
} }
NEO::Zebin::Debug::Segments ModuleImp::getZebinSegments() { NEO::Zebin::Debug::Segments ModuleImp::getZebinSegments() {
@@ -554,7 +562,8 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
for (auto &ki : kernelImmDatas) { for (auto &ki : kernelImmDatas) {
if (!ki->isIsaCopiedToAllocation()) { if (!ki->isIsaCopiedToAllocation()) {
ki->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
ki->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *ki->getIsaGraphicsAllocation()), NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *ki->getIsaGraphicsAllocation()),
*neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap, *neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap,
static_cast<size_t>(ki->getKernelInfo()->heapInfo.kernelHeapSize)); static_cast<size_t>(ki->getKernelInfo()->heapInfo.kernelHeapSize));
@@ -687,23 +696,98 @@ inline ze_result_t ModuleImp::checkIfBuildShouldBeFailed(NEO::Device *neoDevice)
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
inline ze_result_t ModuleImp::initializeKernelImmutableDatas() { ze_result_t ModuleImp::initializeKernelImmutableDatas() {
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE; if (size_t kernelsCount = this->translationUnit->programInfo.kernelInfos.size(); kernelsCount > 0lu) {
ze_result_t result;
kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size()); if (result = this->allocateKernelImmutableDatas(kernelsCount); result != ZE_RESULT_SUCCESS) {
for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
result = kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
this->type == ModuleType::Builtin);
if (result != ZE_RESULT_SUCCESS) {
return result; return result;
} }
kernelImmDatas.push_back(std::move(kernelImmData)); for (size_t i = 0lu; i < kernelsCount; i++) {
result = kernelImmDatas[i]->initialize(this->translationUnit->programInfo.kernelInfos[i],
device,
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
this->translationUnit->globalConstBuffer,
this->translationUnit->globalVarBuffer,
this->type == ModuleType::Builtin);
if (result != ZE_RESULT_SUCCESS) {
kernelImmDatas[i].reset();
return result;
}
}
} }
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
ze_result_t ModuleImp::allocateKernelImmutableDatas(size_t kernelsCount) {
if (this->kernelImmDatas.size() == kernelsCount) {
return ZE_RESULT_SUCCESS;
}
this->kernelImmDatas.reserve(kernelsCount);
for (size_t i = 0lu; i < kernelsCount; i++) {
this->kernelImmDatas.emplace_back(new KernelImmutableData(this->device));
}
return this->setIsaGraphicsAllocations();
}
ze_result_t ModuleImp::setIsaGraphicsAllocations() {
size_t kernelsCount = this->kernelImmDatas.size();
auto kernelsChunks = std::vector<std::pair<size_t, size_t>>(kernelsCount);
size_t kernelsIsaTotalSize = 0lu;
for (auto i = 0lu; i < kernelsCount; i++) {
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
DEBUG_BREAK_IF(kernelInfo->heapInfo.kernelHeapSize == 0lu);
DEBUG_BREAK_IF(!kernelInfo->heapInfo.pKernelHeap);
auto chunkOffset = kernelsIsaTotalSize;
auto chunkSize = this->computeKernelIsaAllocationAlignedSizeWithPadding(kernelInfo->heapInfo.kernelHeapSize);
kernelsIsaTotalSize += chunkSize;
kernelsChunks[i] = {chunkOffset, chunkSize};
}
bool debuggerDisabled = (this->device->getL0Debugger() == nullptr);
if (debuggerDisabled && kernelsIsaTotalSize <= isaAllocationPageSize) {
if (auto allocation = this->allocateKernelsIsaMemory(kernelsIsaTotalSize); allocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
this->kernelsIsaParentRegion.reset(allocation);
}
for (auto i = 0lu; i < kernelsCount; i++) {
auto [isaOffset, isaSize] = kernelsChunks[i];
this->kernelImmDatas[i]->setIsaParentAllocation(this->kernelsIsaParentRegion.get());
this->kernelImmDatas[i]->setIsaSubAllocationOffset(isaOffset);
this->kernelImmDatas[i]->setIsaSubAllocationSize(isaSize);
}
} else {
for (auto i = 0lu; i < kernelsCount; i++) {
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
if (auto allocation = this->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize); allocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
this->kernelImmDatas[i]->setIsaPerKernelAllocation(allocation);
}
}
}
return ZE_RESULT_SUCCESS;
}
size_t ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize) {
auto isaPadding = this->device->getGfxCoreHelper().getPaddingForISAAllocation();
auto kernelStartPointerAlignment = this->device->getGfxCoreHelper().getKernelIsaPointerAlignment();
auto isaAllocationSize = alignUp(isaPadding + isaSize, kernelStartPointerAlignment);
return isaAllocationSize;
}
NEO::GraphicsAllocation *ModuleImp::allocateKernelsIsaMemory(size_t size) {
auto allocType = (this->type == ModuleType::Builtin ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
auto neoDevice = this->device->getNEODevice();
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(),
size,
allocType,
neoDevice->getDeviceBitfield()});
}
void ModuleImp::createDebugZebin() { void ModuleImp::createDebugZebin() {
auto refBin = ArrayRef<const uint8_t>::fromAny(translationUnit->unpackedDeviceBinary.get(), translationUnit->unpackedDeviceBinarySize); auto refBin = ArrayRef<const uint8_t>::fromAny(translationUnit->unpackedDeviceBinary.get(), translationUnit->unpackedDeviceBinarySize);
auto segments = getZebinSegments(); auto segments = getZebinSegments();
@@ -880,7 +964,10 @@ void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSeg
auto segmentId = &kernelImmData - &this->kernelImmDatas[0]; auto segmentId = &kernelImmData - &this->kernelImmDatas[0];
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelImmData->getIsaGraphicsAllocation()), NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelImmData->getIsaGraphicsAllocation()),
*device->getNEODevice(), kernelImmData->getIsaGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer, *device->getNEODevice(),
kernelImmData->getIsaGraphicsAllocation(),
kernelImmData->getIsaOffsetInParentAllocation(),
isaSegmentsForPatching[segmentId].hostPointer,
isaSegmentsForPatching[segmentId].segmentSize); isaSegmentsForPatching[segmentId].segmentSize);
kernelImmData->setIsaCopiedToAllocation(); kernelImmData->setIsaCopiedToAllocation();
@@ -925,8 +1012,9 @@ bool ModuleImp::linkBinary() {
if (linkerInput->getExportedFunctionsSegmentId() >= 0) { if (linkerInput->getExportedFunctionsSegmentId() >= 0) {
auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId(); auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation(); this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation();
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch()); auto offsetInParentAllocation = this->kernelImmDatas[exportedFunctionHeapId]->getIsaOffsetInParentAllocation();
exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize(); exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch() + offsetInParentAllocation);
exportedFunctions.segmentSize = this->kernelImmDatas[exportedFunctionHeapId]->getIsaSize();
} }
Linker::KernelDescriptorsT kernelDescriptors; Linker::KernelDescriptorsT kernelDescriptors;
@@ -938,7 +1026,9 @@ bool ModuleImp::linkBinary() {
auto &kernHeapInfo = kernelInfo->heapInfo; auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap); const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize)); patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize}); auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
kernelDescriptors.push_back(&kernelInfo->kernelDescriptor); kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
} }
} }
@@ -1001,10 +1091,10 @@ ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnF
auto kernelImmData = this->getKernelImmutableData(pFunctionName); auto kernelImmData = this->getKernelImmutableData(pFunctionName);
if (kernelImmData != nullptr) { if (kernelImmData != nullptr) {
auto isaAllocation = kernelImmData->getIsaGraphicsAllocation(); auto isaAllocation = kernelImmData->getIsaGraphicsAllocation();
*pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress()); *pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress() + kernelImmData->getIsaOffsetInParentAllocation());
// Ensure that any kernel in this module which uses this kernel module function pointer has access to the memory. // Ensure that any kernel in this module which uses this kernel module function pointer has access to the memory.
for (auto &data : this->getKernelImmutableDataVector()) { for (auto &data : this->getKernelImmutableDataVector()) {
if (data.get() != kernelImmData) { if (data.get() != kernelImmData && data.get()->getIsaOffsetInParentAllocation() == 0lu) {
data.get()->getResidencyContainer().insert(data.get()->getResidencyContainer().end(), isaAllocation); data.get()->getResidencyContainer().insert(data.get()->getResidencyContainer().end(), isaAllocation);
} }
} }
@@ -1255,7 +1345,9 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
auto &kernHeapInfo = kernelInfo->heapInfo; auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap); const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize)); patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize}); auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
} }
} }
for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) { for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
@@ -1475,8 +1567,13 @@ void ModuleImp::notifyModuleDestroy() {
StackVec<NEO::GraphicsAllocation *, 32> ModuleImp::getModuleAllocations() { StackVec<NEO::GraphicsAllocation *, 32> ModuleImp::getModuleAllocations() {
StackVec<NEO::GraphicsAllocation *, 32> allocs; StackVec<NEO::GraphicsAllocation *, 32> allocs;
for (auto &kernImmData : kernelImmDatas) { if (auto isaParentAllocation = this->getKernelsIsaParentAllocation(); isaParentAllocation != nullptr) {
allocs.push_back(kernImmData->getIsaGraphicsAllocation()); allocs.push_back(isaParentAllocation);
} else {
// ISA allocations not optimized
for (auto &kernImmData : kernelImmDatas) {
allocs.push_back(kernImmData->getIsaGraphicsAllocation());
}
} }
if (translationUnit) { if (translationUnit) {

View File

@@ -124,6 +124,7 @@ struct ModuleImp : public Module {
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override; const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; } const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const { return kernelsIsaParentRegion.get(); }
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override; uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
@@ -155,8 +156,9 @@ struct ModuleImp : public Module {
} }
protected: protected:
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice); MOCKABLE_VIRTUAL ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice);
ze_result_t checkIfBuildShouldBeFailed(NEO::Device *neoDevice); ze_result_t checkIfBuildShouldBeFailed(NEO::Device *neoDevice);
ze_result_t allocateKernelImmutableDatas(size_t kernelsCount);
ze_result_t initializeKernelImmutableDatas(); ze_result_t initializeKernelImmutableDatas();
void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching); void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching);
void verifyDebugCapabilities(); void verifyDebugCapabilities();
@@ -167,6 +169,9 @@ struct ModuleImp : public Module {
void notifyModuleCreate(); void notifyModuleCreate();
void notifyModuleDestroy(); void notifyModuleDestroy();
bool populateHostGlobalSymbolsMap(std::unordered_map<std::string, std::string> &devToHostNameMapping); bool populateHostGlobalSymbolsMap(std::unordered_map<std::string, std::string> &devToHostNameMapping);
ze_result_t setIsaGraphicsAllocations();
MOCKABLE_VIRTUAL size_t computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize);
MOCKABLE_VIRTUAL NEO::GraphicsAllocation *allocateKernelsIsaMemory(size_t size);
StackVec<NEO::GraphicsAllocation *, 32> getModuleAllocations(); StackVec<NEO::GraphicsAllocation *, 32> getModuleAllocations();
Device *device = nullptr; Device *device = nullptr;
@@ -174,6 +179,7 @@ struct ModuleImp : public Module {
std::unique_ptr<ModuleTranslationUnit> translationUnit; std::unique_ptr<ModuleTranslationUnit> translationUnit;
ModuleBuildLog *moduleBuildLog = nullptr; ModuleBuildLog *moduleBuildLog = nullptr;
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr; NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> kernelsIsaParentRegion;
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas; std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
NEO::Linker::RelocatedSymbolsMap symbols; NEO::Linker::RelocatedSymbolsMap symbols;
@@ -198,6 +204,7 @@ struct ModuleImp : public Module {
uint32_t debugElfHandle = 0; uint32_t debugElfHandle = 0;
uint32_t profileFlags = 0; uint32_t profileFlags = 0;
uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max(); uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max();
size_t isaAllocationPageSize = 0;
NEO::Linker::PatchableSegments isaSegmentsForPatching; NEO::Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage; std::vector<std::vector<char>> patchedIsaTempStorage;

View File

@@ -124,24 +124,13 @@ void ModuleImmutableDataFixture::tearDown() {
DeviceFixture::tearDown(); DeviceFixture::tearDown();
} }
ModuleFixture::ProxyModuleImp *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc, void ModuleFixture::setUp(bool skipCreatingModules) {
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
*result = module->initialize(desc, device->getNEODevice());
if (*result != ZE_RESULT_SUCCESS) {
module->destroy();
return nullptr;
}
return module;
}
void ModuleFixture::setUp() {
DebugManager.flags.FailBuildProgramWithStatefulAccess.set(0); DebugManager.flags.FailBuildProgramWithStatefulAccess.set(0);
DeviceFixture::setUp(); DeviceFixture::setUp();
createModuleFromMockBinary(); if (skipCreatingModules == false) {
createModuleFromMockBinary();
}
} }
void ModuleFixture::createModuleFromMockBinary(ModuleType type) { void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
@@ -155,7 +144,13 @@ void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
ModuleBuildLog *moduleBuildLog = nullptr; ModuleBuildLog *moduleBuildLog = nullptr;
ze_result_t result = ZE_RESULT_SUCCESS; ze_result_t result = ZE_RESULT_SUCCESS;
module.reset(ProxyModuleImp::create(device, &moduleDesc, moduleBuildLog, type, &result)); if (!module) {
module.reset(new WhiteBox<::L0::Module>{device, moduleBuildLog, type});
}
result = module->initialize(&moduleDesc, device->getNEODevice());
if (result != ZE_RESULT_SUCCESS) {
module->destroy();
}
} }
void ModuleFixture::createKernel() { void ModuleFixture::createKernel() {

View File

@@ -123,19 +123,8 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
struct ModuleFixture : public DeviceFixture { struct ModuleFixture : public DeviceFixture {
struct ProxyModuleImp : public WhiteBox<::L0::Module> { void setUp(bool skipCreatingModules);
using BaseClass = WhiteBox<::L0::Module>; void setUp() { this->setUp(false); }
using BaseClass::BaseClass;
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmDatas() {
return kernelImmDatas;
}
static ModuleFixture::ProxyModuleImp *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
};
void setUp();
void createModuleFromMockBinary(ModuleType type = ModuleType::User); void createModuleFromMockBinary(ModuleType type = ModuleType::User);
@@ -147,7 +136,7 @@ struct ModuleFixture : public DeviceFixture {
const std::string kernelName = "test"; const std::string kernelName = "test";
const uint32_t numKernelArguments = 6; const uint32_t numKernelArguments = 6;
std::unique_ptr<ProxyModuleImp> module; std::unique_ptr<WhiteBox<::L0::Module>> module;
std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel; std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel;
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData; std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
DebugManagerStateRestore restore; DebugManagerStateRestore restore;

View File

@@ -34,6 +34,7 @@ set(L0_MOCKS_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h
) )

View File

@@ -0,0 +1,26 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "level_zero/core/source/device/device.h"
namespace L0 {
namespace ult {
ze_result_t WhiteBox<::L0::Module>::initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) {
auto result = this->BaseClass::initializeTranslationUnit(desc, neoDevice);
if (this->mockGlobalConstBuffer) {
this->translationUnit->globalConstBuffer = this->mockGlobalConstBuffer;
}
if (this->mockGlobalVarBuffer) {
this->translationUnit->globalVarBuffer = this->mockGlobalVarBuffer;
}
return result;
}
} // namespace ult
} // namespace L0

View File

@@ -13,6 +13,7 @@
#include "shared/test/common/test_macros/mock_method_macros.h" #include "shared/test/common/test_macros/mock_method_macros.h"
#include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h" #include "level_zero/core/test/unit_tests/white_box.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
@@ -48,6 +49,7 @@ constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit
template <> template <>
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp { struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass = ::L0::ModuleImp; using BaseClass = ::L0::ModuleImp;
using BaseClass::allocateKernelsIsaMemory;
using BaseClass::allocatePrivateMemoryPerDispatch; using BaseClass::allocatePrivateMemoryPerDispatch;
using BaseClass::BaseClass; using BaseClass::BaseClass;
using BaseClass::builtFromSPIRv; using BaseClass::builtFromSPIRv;
@@ -61,6 +63,7 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass::isFunctionSymbolExportEnabled; using BaseClass::isFunctionSymbolExportEnabled;
using BaseClass::isGlobalSymbolExportEnabled; using BaseClass::isGlobalSymbolExportEnabled;
using BaseClass::kernelImmDatas; using BaseClass::kernelImmDatas;
using BaseClass::setIsaGraphicsAllocations;
using BaseClass::symbols; using BaseClass::symbols;
using BaseClass::translationUnit; using BaseClass::translationUnit;
using BaseClass::type; using BaseClass::type;
@@ -70,6 +73,11 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
: ::L0::ModuleImp{device, moduleBuildLog, type} { : ::L0::ModuleImp{device, moduleBuildLog, type} {
this->translationUnit.reset(new MockModuleTranslationUnit{device}); this->translationUnit.reset(new MockModuleTranslationUnit{device});
} }
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) override;
NEO::GraphicsAllocation *mockGlobalVarBuffer = nullptr;
NEO::GraphicsAllocation *mockGlobalConstBuffer = nullptr;
}; };
using Module = WhiteBox<::L0::Module>; using Module = WhiteBox<::L0::Module>;
@@ -91,15 +99,23 @@ struct Mock<Module> : public Module {
(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog)); (uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog));
ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_module_properties_t * pModuleProperties)); ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_module_properties_t * pModuleProperties));
ADDMETHOD_NOBASE(getGlobalPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pGlobalName, size_t *pSize, void **pPtr)); ADDMETHOD_NOBASE(getGlobalPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pGlobalName, size_t *pSize, void **pPtr));
ADDMETHOD(allocateKernelsIsaMemory, NEO::GraphicsAllocation *, true, nullptr, (size_t isaSize), (isaSize));
ADDMETHOD(computeKernelIsaAllocationAlignedSizeWithPadding, size_t, true, 0ul, (size_t isaSize), (isaSize));
}; };
struct MockModule : public L0::ModuleImp { struct MockModule : public L0::ModuleImp {
using ModuleImp::allocateKernelImmutableDatas;
using ModuleImp::allocateKernelsIsaMemory;
using ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding;
using ModuleImp::debugModuleHandle; using ModuleImp::debugModuleHandle;
using ModuleImp::getModuleAllocations; using ModuleImp::getModuleAllocations;
using ModuleImp::initializeKernelImmutableDatas;
using ModuleImp::isaAllocationPageSize;
using ModuleImp::isFunctionSymbolExportEnabled; using ModuleImp::isFunctionSymbolExportEnabled;
using ModuleImp::isGlobalSymbolExportEnabled; using ModuleImp::isGlobalSymbolExportEnabled;
using ModuleImp::kernelImmDatas; using ModuleImp::kernelImmDatas;
using ModuleImp::populateHostGlobalSymbolsMap; using ModuleImp::populateHostGlobalSymbolsMap;
using ModuleImp::setIsaGraphicsAllocations;
using ModuleImp::symbols; using ModuleImp::symbols;
using ModuleImp::translationUnit; using ModuleImp::translationUnit;
@@ -115,6 +131,8 @@ struct MockModule : public L0::ModuleImp {
return kernelImmData; return kernelImmData;
} }
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVectorRef() { return kernelImmDatas; }
KernelImmutableData *kernelImmData = nullptr; KernelImmutableData *kernelImmData = nullptr;
}; };

View File

@@ -457,11 +457,17 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, IsAtLeastSkl) { HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, IsAtLeastSkl) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp { class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public: public:
MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) { MockQueryKernelTimestampsKernel(MockModule *module) : KernelImp{module}, module{module} {
mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor; mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor;
size_t stubKernelHeapSize = 42;
mockKernelImmutableData.setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(stubKernelHeapSize));
this->kernelImmData = &mockKernelImmutableData; this->kernelImmData = &mockKernelImmutableData;
} }
~MockQueryKernelTimestampsKernel() override {
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(mockKernelImmutableData.isaGraphicsAllocation.release());
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override { ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
if (argIndex == 0) { if (argIndex == 0) {
index0Allocation = allocation; index0Allocation = allocation;
@@ -480,12 +486,13 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetA
NEO::GraphicsAllocation *index0Allocation = nullptr; NEO::GraphicsAllocation *index0Allocation = nullptr;
KernelDescriptor mockKernelDescriptor = {}; KernelDescriptor mockKernelDescriptor = {};
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {}; WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
MockModule *module = nullptr;
}; };
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl { struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin); tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin);
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(static_cast<L0::ModuleImp *>(tmpModule.get())); tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(tmpModule.get());
} }
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override { MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel.get(); return tmpMockKernel.get();

View File

@@ -1382,8 +1382,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
auto overAllocMinSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) + margin1KB; auto overAllocMinSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) + margin1KB;
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"}; auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get()); auto &kernelImmDatas = this->module->kernelImmDatas;
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
for (size_t i = 0; i < kernelsNb; i++) { for (size_t i = 0; i < kernelsNb; i++) {
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor()); auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize + static_cast<uint32_t>(i * MemoryConstants::cacheLineSize); kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize + static_cast<uint32_t>(i * MemoryConstants::cacheLineSize);
@@ -1419,8 +1418,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB; auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"}; auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get()); auto &kernelImmDatas = this->module->kernelImmDatas;
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
for (size_t i = 0; i < kernelsNb; i++) { for (size_t i = 0; i < kernelsNb; i++) {
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor()); auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize; kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize;

View File

@@ -8,6 +8,7 @@
#pragma once #pragma once
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_compilers.h"
@@ -84,6 +85,12 @@ struct L0DebuggerHwFixture : public L0DebuggerFixture {
MockDebuggerL0Hw<GfxFamily> *getMockDebuggerL0Hw() { MockDebuggerL0Hw<GfxFamily> *getMockDebuggerL0Hw() {
return static_cast<MockDebuggerL0Hw<GfxFamily> *>(debuggerHw); return static_cast<MockDebuggerL0Hw<GfxFamily> *>(debuggerHw);
} }
NEO::GraphicsAllocation *allocateIsaMemory(size_t size, bool internal) {
auto allocType = (internal ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(), size, allocType, neoDevice->getDeviceBitfield()});
}
DebuggerL0 *debuggerHw = nullptr; DebuggerL0 *debuggerHw = nullptr;
}; };

View File

@@ -60,6 +60,7 @@ TEST_F(KernelInitializeTest, givenDebuggingEnabledWhenKernelsAreInitializedThenA
kernelInfo.heapInfo.pKernelHeap = &kernelHeap; kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device); KernelImmutableData kernelImmutableData(device);
kernelImmutableData.setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo.heapInfo.kernelHeapSize, false));
memoryOperationsHandler->makeResidentCalledCount = 0; memoryOperationsHandler->makeResidentCalledCount = 0;
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
@@ -327,12 +328,14 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinDebugDataWhenInitializingMo
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName; kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User); std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device); moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
@@ -371,10 +374,12 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenDumpElfFlagAndZebinWhenInitializ
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName; kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
@@ -433,6 +438,7 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinWhenModuleIsInitializedAndD
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName; kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User); std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device); moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
@@ -567,6 +573,7 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitial
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName; kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device); auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User); std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));

View File

@@ -1865,9 +1865,9 @@ TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRunt
EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime()); EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
} }
struct KernelIsaTests : Test<ModuleFixture> { struct KernelIsaFixture : ModuleFixture {
void SetUp() override { void setUp() {
Test<ModuleFixture>::SetUp(); ModuleFixture::setUp(true);
auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported; bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
@@ -1883,137 +1883,98 @@ struct KernelIsaTests : Test<ModuleFixture> {
} }
std::unique_ptr<OsContext> bcsOsContext; std::unique_ptr<OsContext> bcsOsContext;
uint32_t testKernelHeap = 0;
}; };
using KernelIsaTests = Test<KernelIsaFixture>;
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) { TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount(); auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); auto &kernelImmutableData = this->module->kernelImmDatas.back();
if (kernelImmutableData->getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount()); EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
} else { } else {
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
} }
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
} }
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) { TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed)); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount(); auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
} }
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) { TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount(); auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
} }
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) { TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) {
uint32_t kernelHeap = 0; this->createModuleFromMockBinary(ModuleType::Builtin);
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device); auto &kernelImmutableData = this->module->kernelImmDatas.back();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
} }
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) { TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) {
uint32_t kernelHeap = 0; this->createModuleFromMockBinary(ModuleType::User);
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device); auto &kernelImmutableData = this->module->kernelImmDatas.back();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
} }
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) { TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) {
uint32_t kernelHeap = 0; this->createModuleFromMockBinary(ModuleType::User);
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device); auto &kernelImmutableData = this->module->kernelImmDatas.back();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); auto kernelHeapSize = kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize;
auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation(); auto &helper = device->getNEODevice()->getGfxCoreHelper();
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation(); size_t isaPadding = helper.getPaddingForISAAllocation();
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.kernelHeapSize + isaPadding); EXPECT_EQ(kernelImmutableData->getIsaSize(), kernelHeapSize + isaPadding);
} }
TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) { TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) {
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
uint64_t gpuAddress = 0x1200; uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress); void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t size = 0x1100; size_t size = 0x1100;
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size); NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size); NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
kernelImmutableData.initialize(&kernelInfo, device, 0, ModuleBuildLog *moduleBuildLog = nullptr;
&globalConstBuffer, &globalVarBuffer, false); this->module.reset(new WhiteBox<::L0::Module>{this->device, moduleBuildLog, ModuleType::User});
auto &resCont = kernelImmutableData.getResidencyContainer(); this->module->mockGlobalVarBuffer = &globalVarBuffer;
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer)); this->module->mockGlobalConstBuffer = &globalConstBuffer;
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
this->createModuleFromMockBinary(ModuleType::User);
for (auto &kernelImmData : this->module->kernelImmDatas) {
auto &resCont = kernelImmData->getResidencyContainer();
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
}
this->module->translationUnit->globalConstBuffer = nullptr;
this->module->translationUnit->globalVarBuffer = nullptr;
} }
using KernelImpPatchBindlessTest = Test<ModuleFixture>; using KernelImpPatchBindlessTest = Test<ModuleFixture>;

View File

@@ -26,6 +26,7 @@
#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_elf.h" #include "shared/test/common/mocks/mock_elf.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_l0_debugger.h"
#include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h"
#include "shared/test/common/mocks/mock_modules_zebin.h" #include "shared/test/common/mocks/mock_modules_zebin.h"
#include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/hw_test.h"
@@ -104,7 +105,71 @@ HWTEST_F(ModuleTest, givenUserModuleWhenCreatedThenCorrectAllocationTypeIsUsedFo
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType()); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType());
} }
template <bool localMemEnabled>
struct ModuleKernelIsaAllocationsFixture : public ModuleFixture {
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
using Module = WhiteBox<::L0::Module>;
void setUp() {
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
ModuleFixture::setUp();
ModuleBuildLog *moduleBuildLog = nullptr;
auto type = ModuleType::User;
this->module.reset(new Mock<Module>{device, moduleBuildLog, type});
zebinData = std::make_unique<ZebinTestData::ZebinWithL0TestCommonModule>(device->getHwInfo());
const auto &src = zebinData->storage;
this->moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
this->moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.data());
this->moduleDesc.inputSize = src.size();
this->mockModule = static_cast<Mock<Module> *>(this->module.get());
}
void givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned() {
mockModule->allocateKernelsIsaMemoryCallBase = false;
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
void givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned() {
mockModule->allocateKernelsIsaMemoryCallBase = false;
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingCallBase = false;
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingResult = isaAllocationPageSize;
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
Mock<Module> *mockModule = nullptr;
ze_module_desc_t moduleDesc = {};
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
};
using ModuleKernelIsaAllocationsInLocalMemoryTests = Test<ModuleKernelIsaAllocationsFixture<true>>;
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
}
using ModuleKernelIsaAllocationsInSharedMemoryTests = Test<ModuleKernelIsaAllocationsFixture<false>>;
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) { HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) {
this->module.reset();
createModuleFromMockBinary(ModuleType::Builtin); createModuleFromMockBinary(ModuleType::Builtin);
createKernel(); createKernel();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType()); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType());
@@ -2262,16 +2327,8 @@ TEST_F(ModuleFunctionPointerTests, givenModuleWithExportedSymbolThenGetFunctionP
EXPECT_EQ(reinterpret_cast<uint64_t>(functionPointer), module0->kernelImmDatas[0]->getIsaGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(reinterpret_cast<uint64_t>(functionPointer), module0->kernelImmDatas[0]->getIsaGraphicsAllocation()->getGpuAddress());
} }
class DeviceModuleSetArgBufferTest : public ModuleFixture, public ::testing::Test { class DeviceModuleSetArgBufferFixture : public ModuleFixture {
public: public:
void SetUp() override {
ModuleFixture::setUp();
}
void TearDown() override {
ModuleFixture::tearDown();
}
void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) { void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) {
ze_kernel_desc_t kernelDesc = {}; ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = kernelName.c_str(); kernelDesc.pKernelName = kernelName.c_str();
@@ -2283,11 +2340,11 @@ class DeviceModuleSetArgBufferTest : public ModuleFixture, public ::testing::Tes
EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(ZE_RESULT_SUCCESS, res);
} }
}; };
using DeviceModuleSetArgBufferTest = Test<DeviceModuleSetArgBufferFixture>;
HWTEST_F(DeviceModuleSetArgBufferTest, HWTEST_F(DeviceModuleSetArgBufferTest,
givenValidMemoryUsedinFirstCallToSetArgBufferThenNullptrSetOnTheSecondCallThenArgBufferisUpdatedInEachCallAndSuccessIsReturned) { givenValidMemoryUsedinFirstCallToSetArgBufferThenNullptrSetOnTheSecondCallThenArgBufferisUpdatedInEachCallAndSuccessIsReturned) {
uint32_t rootDeviceIndex = 0; uint32_t rootDeviceIndex = 0;
createModuleFromMockBinary();
ze_kernel_handle_t kernelHandle; ze_kernel_handle_t kernelHandle;
void *validBufferPtr = nullptr; void *validBufferPtr = nullptr;
@@ -3474,6 +3531,227 @@ TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
} }
} }
template <bool localMemEnabled>
struct ModuleIsaAllocationsFixture : public DeviceFixture {
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
static constexpr NEO::MemoryPool isaAllocationMemoryPool = (localMemEnabled ? NEO::MemoryPool::LocalMemory : NEO::MemoryPool::System4KBPagesWith32BitGpuAddressing);
void setUp() {
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
DeviceFixture::setUp();
this->neoDevice = this->device->getNEODevice();
this->isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
this->kernelStartPointerAlignment = this->neoDevice->getGfxCoreHelper().getKernelIsaPointerAlignment();
this->mockMemoryManager = static_cast<MockMemoryManager *>(this->neoDevice->getMemoryManager());
this->mockMemoryManager->localMemorySupported[this->neoDevice->getRootDeviceIndex()] = true;
this->mockModule.reset(new MockModule{this->device, nullptr, ModuleType::User});
this->mockModule->translationUnit.reset(new MockModuleTranslationUnit{this->device});
}
void tearDown() {
this->mockModule->translationUnit.reset();
this->mockModule.reset();
DeviceFixture::tearDown();
}
void prepareKernelInfoAndAddToTranslationUnit(size_t isaSize) {
auto kernelInfo = new KernelInfo{};
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<const void *>(0xdeadbeef0000);
kernelInfo->heapInfo.kernelHeapSize = static_cast<uint32_t>(isaSize);
this->mockModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
}
size_t computeKernelIsaAllocationSizeWithPadding(size_t isaSize) {
auto isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
return isaPadding + isaSize;
}
template <typename FamilyType>
void givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
auto requestedSize = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
auto debugger = MockDebuggerL0Hw<FamilyType>::allocate(neoDevice);
this->neoDevice->getRootDeviceEnvironmentRef().debugger.reset(debugger);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
}
void givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
auto maxAllocationSizeInPage = alignDown(isaAllocationPageSize - this->isaPadding, this->kernelStartPointerAlignment);
this->prepareKernelInfoAndAddToTranslationUnit(maxAllocationSizeInPage);
auto tinyAllocationSize = 0x8;
this->prepareKernelInfoAndAddToTranslationUnit(tinyAllocationSize);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
if constexpr (localMemEnabled) {
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[0]->getIsaSize());
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[1]->getIsaSize());
} else {
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(maxAllocationSizeInPage), kernelImmDatas[0]->getIsaSize());
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(tinyAllocationSize), kernelImmDatas[1]->getIsaSize());
}
}
struct ProxyKernelImmutableData : public KernelImmutableData {
using BaseClass = KernelImmutableData;
using BaseClass::BaseClass;
~ProxyKernelImmutableData() override { this->KernelImmutableData::~KernelImmutableData(); }
ADDMETHOD(initialize, ze_result_t, true, ZE_RESULT_ERROR_UNKNOWN,
(NEO::KernelInfo * kernelInfo, L0::Device *device, uint32_t computeUnitsUsedForScratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel),
(kernelInfo, device, computeUnitsUsedForScratch, globalConstBuffer, globalVarBuffer, internalKernel));
};
void givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched() {
auto requestedSize = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVectorRef();
{
auto kernelsCount = 3ul;
kernelImmDatas.reserve(kernelsCount);
for (size_t i = 0lu; i < kernelsCount; i++) {
kernelImmDatas.emplace_back(new ProxyKernelImmutableData(this->device));
}
auto result = this->mockModule->setIsaGraphicsAllocations();
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
}
static_cast<ProxyKernelImmutableData *>(kernelImmDatas[2].get())->initializeCallBase = false;
auto result = this->mockModule->initializeKernelImmutableDatas();
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
ASSERT_NE(kernelImmDatas[0].get(), nullptr);
ASSERT_NE(kernelImmDatas[1].get(), nullptr);
EXPECT_EQ(kernelImmDatas[2].get(), nullptr);
EXPECT_NE(kernelImmDatas[0]->getIsaGraphicsAllocation(), nullptr);
EXPECT_NE(kernelImmDatas[1]->getIsaGraphicsAllocation(), nullptr);
}
size_t isaPadding;
size_t kernelStartPointerAlignment;
NEO::Device *neoDevice = nullptr;
MockMemoryManager *mockMemoryManager = nullptr;
std::unique_ptr<MockModule> mockModule = nullptr;
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
};
using ModuleIsaAllocationsInLocalMemoryTest = Test<ModuleIsaAllocationsFixture<true>>;
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KWhenKernelImmutableDatasInitializedThenKernelIsasShareParentAllocation) {
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
auto requestedSize1 = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
auto isaAllocationSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
auto requestedSize2 = isaAllocationPageSize - isaAllocationSize1 - this->isaPadding;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
auto isaAllocationSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationSize2);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), isaAllocationSize2);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
}
HWTEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
}
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichExceedSinglePage64KWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
}
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
}
using ModuleIsaAllocationsInSystemMemoryTest = Test<ModuleIsaAllocationsFixture<false>>;
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenKernelIsaWhichCouldFitInPages4KBWhenKernelImmutableDatasInitializedThenKernelIsasCanGetSeparateAllocationsDependingOnPaddingSize) {
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
const auto requestedSize1 = 0x8;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
auto isaAllocationAlignedSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
const auto requestedSize2 = 0x4;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
auto isaAllocationAlignedSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
// for 4kB pages, 2x isaPaddings alone could exceed isaAllocationPageSize, which precludes page sharing
const bool isasShouldShareSamePage = (isaAllocationAlignedSize1 + isaAllocationAlignedSize2 <= isaAllocationPageSize);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
if (isasShouldShareSamePage) {
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationAlignedSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationAlignedSize2);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize2);
} else {
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize1));
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize2));
}
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
}
HWTEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
}
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
}
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
}
using ModuleInitializeTest = Test<DeviceFixture>; using ModuleInitializeTest = Test<DeviceFixture>;
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) { TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
@@ -3566,9 +3844,7 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
moduleDesc.inputSize = 10; moduleDesc.inputSize = 10;
ModuleBuildLog *moduleBuildLog = nullptr; ModuleBuildLog *moduleBuildLog = nullptr;
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device, auto module = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleBuildLog,
ModuleType::User);
module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device); module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
@@ -3592,7 +3868,8 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
// pass kernelInfo ownership to programInfo // pass kernelInfo ownership to programInfo
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(this->device);
kernelImmData->setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false); kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer); kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer);
@@ -3764,6 +4041,7 @@ TEST_F(ModuleTests, whenCopyingPatchedSegmentsThenAllocationsAreSetWritableForTb
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -3793,6 +4071,7 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
auto patchAddr = reinterpret_cast<uintptr_t>(ptrOffset(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer(), 0x8)); auto patchAddr = reinterpret_cast<uintptr_t>(ptrOffset(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer(), 0x8));
pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -3823,6 +4102,7 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinMo
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, true); kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, true);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true; kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
@@ -3868,6 +4148,7 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
auto slmInlineSizeCopy = kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize; auto slmInlineSizeCopy = kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true); kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -3910,6 +4191,7 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true); kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -3953,6 +4235,7 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingModuleThe
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true; kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
@@ -3983,6 +4266,7 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLink
kernelInfo->heapInfo.pKernelHeap = data; kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false; kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false;
@@ -4023,6 +4307,9 @@ TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAll
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
auto isaAlloc = module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize);
ASSERT_NE(isaAlloc, nullptr);
kernelImmData->setIsaPerKernelAllocation(isaAlloc);
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false); kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
module->kernelImmDatas.push_back(std::move(kernelImmData)); module->kernelImmDatas.push_back(std::move(kernelImmData));

View File

@@ -77,7 +77,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{ {
auto alloc = args.dispatchInterface->getIsaAllocation(); auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc); UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch(); auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
idd.setKernelStartPointer(offset); idd.setKernelStartPointer(offset);
idd.setKernelStartPointerHigh(0u); idd.setKernelStartPointerHigh(0u);
} }

View File

@@ -87,7 +87,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{ {
auto alloc = args.dispatchInterface->getIsaAllocation(); auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc); UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch(); auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
if (!localIdsGenerationByRuntime) { if (!localIdsGenerationByRuntime) {
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
} }

View File

@@ -53,6 +53,7 @@ class GfxCoreHelper {
static std::unique_ptr<GfxCoreHelper> create(const GFXCORE_FAMILY gfxCoreFamily); static std::unique_ptr<GfxCoreHelper> create(const GFXCORE_FAMILY gfxCoreFamily);
virtual size_t getMaxBarrierRegisterPerSlice() const = 0; virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
virtual size_t getPaddingForISAAllocation() const = 0; virtual size_t getPaddingForISAAllocation() const = 0;
virtual size_t getKernelIsaPointerAlignment() const = 0;
virtual uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0; virtual uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0; virtual uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo, const ProductHelper &productHelper) = 0; virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo, const ProductHelper &productHelper) = 0;
@@ -215,6 +216,10 @@ class GfxCoreHelperHw : public GfxCoreHelper {
size_t getPaddingForISAAllocation() const override; size_t getPaddingForISAAllocation() const override;
size_t getKernelIsaPointerAlignment() const override {
return static_cast<size_t>(GfxFamily::cmdInitInterfaceDescriptorData.KERNELSTARTPOINTER_ALIGN_SIZE);
}
uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const override; uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const override; uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const override;

View File

@@ -40,6 +40,7 @@ struct DispatchKernelEncoderI {
virtual uint32_t getSurfaceStateHeapDataSize() const = 0; virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
virtual GraphicsAllocation *getIsaAllocation() const = 0; virtual GraphicsAllocation *getIsaAllocation() const = 0;
virtual uint64_t getIsaOffsetInParentAllocation() const = 0;
virtual const uint8_t *getDynamicStateHeapData() const = 0; virtual const uint8_t *getDynamicStateHeapData() const = 0;
virtual uint32_t getRequiredWorkgroupOrder() const = 0; virtual uint32_t getRequiredWorkgroupOrder() const = 0;

View File

@@ -997,6 +997,45 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
EXPECT_NE(usedAfter, usedBefore); EXPECT_NE(usedAfter, usedBefore);
} }
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
}
HWTEST_F(EncodeDispatchKernelTest, givenKernelStartPointerAlignmentInInterfaceDescriptorWhenHelperGetterUsedThenCorrectValueReturned) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_ALIGN_SIZE, pDevice->getGfxCoreHelper().getKernelIsaPointerAlignment());
}
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
EXPECT_EQ(walkerCmd->getInterfaceDescriptor().getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
}
HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) { HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;

View File

@@ -55,5 +55,6 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
ADDMETHOD_CONST_NOBASE(getDynamicStateHeapData, const uint8_t *, nullptr, ()); ADDMETHOD_CONST_NOBASE(getDynamicStateHeapData, const uint8_t *, nullptr, ());
ADDMETHOD_CONST_NOBASE(requiresGenerationOfLocalIdsByRuntime, bool, true, ()); ADDMETHOD_CONST_NOBASE(requiresGenerationOfLocalIdsByRuntime, bool, true, ());
ADDMETHOD_CONST_NOBASE(getSlmPolicy, SlmPolicy, SlmPolicy::SlmPolicyNone, ()); ADDMETHOD_CONST_NOBASE(getSlmPolicy, SlmPolicy, SlmPolicy::SlmPolicyNone, ());
ADDMETHOD_CONST_NOBASE(getIsaOffsetInParentAllocation, uint64_t, 0lu, ());
}; };
} // namespace NEO } // namespace NEO