Revert "feature: Optimize intra-module kernel ISA allocations"

This reverts commit c348831470.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2023-09-19 13:53:41 +02:00
committed by Compute-Runtime-Automation
parent c348831470
commit 913a926fd4
22 changed files with 176 additions and 648 deletions

View File

@@ -35,7 +35,7 @@ struct KernelImmutableData {
KernelImmutableData(L0::Device *l0device = nullptr);
virtual ~KernelImmutableData();
MOCKABLE_VIRTUAL ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch,
ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch,
NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer,
bool internalKernel);
@@ -48,14 +48,7 @@ struct KernelImmutableData {
}
uint32_t getIsaSize() const;
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const;
void setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation);
inline NEO::GraphicsAllocation *getIsaParentAllocation() const { return isaParentAllocation; }
inline void setIsaParentAllocation(NEO::GraphicsAllocation *allocation) { isaParentAllocation = allocation; };
inline size_t getIsaOffsetInParentAllocation() const { return isaSubAllocationOffset; }
inline void setIsaSubAllocationOffset(size_t offset) { isaSubAllocationOffset = offset; }
inline void setIsaSubAllocationSize(size_t size) { isaSubAllocationSize = size; }
inline size_t getIsaSubAllocationSize() const { return isaSubAllocationSize; }
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
@@ -87,9 +80,6 @@ struct KernelImmutableData {
NEO::KernelInfo *kernelInfo = nullptr;
NEO::KernelDescriptor *kernelDescriptor = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
NEO::GraphicsAllocation *isaParentAllocation = nullptr;
size_t isaSubAllocationOffset = 0lu;
size_t isaSubAllocationSize = 0lu;
uint32_t crossThreadDataSize = 0;
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;

View File

@@ -75,6 +75,20 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
auto neoDevice = deviceImp->getActiveDevice();
auto memoryManager = neoDevice->getMemoryManager();
auto kernelIsaSize = kernelInfo->heapInfo.kernelHeapSize;
UNRECOVERABLE_IF(kernelIsaSize == 0);
UNRECOVERABLE_IF(!kernelInfo->heapInfo.pKernelHeap);
const auto allocType = internalKernel ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(
{neoDevice->getRootDeviceIndex(), kernelIsaSize, allocType, neoDevice->getDeviceBitfield()});
if (!allocation) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
isaGraphicsAllocation.reset(allocation);
if (neoDevice->getDebugger() && kernelInfo->kernelDescriptor.external.debugData.get()) {
createRelocatedDebugData(globalConstBuffer, globalVarBuffer);
@@ -189,41 +203,18 @@ void KernelImmutableData::createRelocatedDebugData(NEO::GraphicsAllocation *glob
}
}
NEO::GraphicsAllocation *KernelImmutableData::getIsaGraphicsAllocation() const {
if (auto allocation = this->getIsaParentAllocation(); allocation != nullptr) {
DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
return allocation;
} else {
DEBUG_BREAK_IF(this->isaGraphicsAllocation.get() == nullptr);
return this->isaGraphicsAllocation.get();
}
}
uint32_t KernelImmutableData::getIsaSize() const {
if (this->getIsaParentAllocation()) {
DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
return static_cast<uint32_t>(this->isaSubAllocationSize);
} else {
return static_cast<uint32_t>(this->isaGraphicsAllocation->getUnderlyingBufferSize());
}
}
void KernelImmutableData::setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation) {
DEBUG_BREAK_IF(this->isaParentAllocation != nullptr);
this->isaGraphicsAllocation.reset(allocation);
}
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
if (baseAddress) {
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper();
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress() +
this->kernelImmData->getIsaOffsetInParentAllocation());
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress());
}
return ZE_RESULT_SUCCESS;
}
uint32_t KernelImmutableData::getIsaSize() const {
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
}
KernelImp::KernelImp(Module *module) : module(module) {}
KernelImp::~KernelImp() {
@@ -957,12 +948,10 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
isaAllocation->setAubWritable(true, std::numeric_limits<uint32_t>::max());
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(neoDevice->getRootDeviceEnvironment(), *isaAllocation),
*neoDevice,
isaAllocation,
this->kernelImmData->getIsaOffsetInParentAllocation(),
0,
this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap,
static_cast<size_t>(this->kernelImmData->getKernelInfo()->heapInfo.kernelHeapSize));
}
@@ -1220,10 +1209,6 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
return getImmutableData()->getIsaGraphicsAllocation();
}
uint64_t KernelImp::getIsaOffsetInParentAllocation() const {
return static_cast<uint64_t>(getImmutableData()->getIsaOffsetInParentAllocation());
}
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
auto &threadArbitrationPolicy = const_cast<NEO::ThreadArbitrationPolicy &>(getKernelDescriptor().kernelAttributes.threadArbitrationPolicy);
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {

View File

@@ -141,7 +141,6 @@ struct KernelImp : Kernel {
}
NEO::GraphicsAllocation *getIsaAllocation() const override;
uint64_t getIsaOffsetInParentAllocation() const override;
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }

View File

@@ -32,7 +32,6 @@
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/string.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
@@ -495,19 +494,12 @@ void ModuleTranslationUnit::processDebugData() {
ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
: device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
moduleBuildLog(moduleBuildLog), kernelsIsaParentRegion(nullptr), type(type) {
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto &hwInfo = device->getHwInfo();
this->isaAllocationPageSize = gfxCoreHelper.useSystemMemoryPlacementForISA(hwInfo) ? MemoryConstants::pageSize : MemoryConstants::pageSize64k;
this->productFamily = hwInfo.platform.eProductFamily;
moduleBuildLog(moduleBuildLog), type(type) {
productFamily = device->getHwInfo().platform.eProductFamily;
}
ModuleImp::~ModuleImp() {
this->kernelImmDatas.clear();
if (this->kernelsIsaParentRegion) {
DEBUG_BREAK_IF(this->device->getNEODevice()->getMemoryManager() == nullptr);
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->kernelsIsaParentRegion.release());
}
kernelImmDatas.clear();
}
NEO::Zebin::Debug::Segments ModuleImp::getZebinSegments() {
@@ -562,8 +554,7 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
for (auto &ki : kernelImmDatas) {
if (!ki->isIsaCopiedToAllocation()) {
ki->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
ki->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *ki->getIsaGraphicsAllocation()),
*neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap,
static_cast<size_t>(ki->getKernelInfo()->heapInfo.kernelHeapSize));
@@ -696,98 +687,23 @@ inline ze_result_t ModuleImp::checkIfBuildShouldBeFailed(NEO::Device *neoDevice)
return ZE_RESULT_SUCCESS;
}
ze_result_t ModuleImp::initializeKernelImmutableDatas() {
if (size_t kernelsCount = this->translationUnit->programInfo.kernelInfos.size(); kernelsCount > 0lu) {
ze_result_t result;
if (result = this->allocateKernelImmutableDatas(kernelsCount); result != ZE_RESULT_SUCCESS) {
return result;
}
for (size_t i = 0lu; i < kernelsCount; i++) {
result = kernelImmDatas[i]->initialize(this->translationUnit->programInfo.kernelInfos[i],
device,
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
this->translationUnit->globalConstBuffer,
this->translationUnit->globalVarBuffer,
inline ze_result_t ModuleImp::initializeKernelImmutableDatas() {
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size());
for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
result = kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
this->type == ModuleType::Builtin);
if (result != ZE_RESULT_SUCCESS) {
kernelImmDatas[i].reset();
return result;
}
}
kernelImmDatas.push_back(std::move(kernelImmData));
}
return ZE_RESULT_SUCCESS;
}
ze_result_t ModuleImp::allocateKernelImmutableDatas(size_t kernelsCount) {
if (this->kernelImmDatas.size() == kernelsCount) {
return ZE_RESULT_SUCCESS;
}
this->kernelImmDatas.reserve(kernelsCount);
for (size_t i = 0lu; i < kernelsCount; i++) {
this->kernelImmDatas.emplace_back(new KernelImmutableData(this->device));
}
return this->setIsaGraphicsAllocations();
}
ze_result_t ModuleImp::setIsaGraphicsAllocations() {
size_t kernelsCount = this->kernelImmDatas.size();
auto kernelsChunks = std::vector<std::pair<size_t, size_t>>(kernelsCount);
size_t kernelsIsaTotalSize = 0lu;
for (auto i = 0lu; i < kernelsCount; i++) {
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
DEBUG_BREAK_IF(kernelInfo->heapInfo.kernelHeapSize == 0lu);
DEBUG_BREAK_IF(!kernelInfo->heapInfo.pKernelHeap);
auto chunkOffset = kernelsIsaTotalSize;
auto chunkSize = this->computeKernelIsaAllocationAlignedSizeWithPadding(kernelInfo->heapInfo.kernelHeapSize);
kernelsIsaTotalSize += chunkSize;
kernelsChunks[i] = {chunkOffset, chunkSize};
}
bool debuggerDisabled = (this->device->getL0Debugger() == nullptr);
if (debuggerDisabled && kernelsIsaTotalSize <= isaAllocationPageSize) {
if (auto allocation = this->allocateKernelsIsaMemory(kernelsIsaTotalSize); allocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
this->kernelsIsaParentRegion.reset(allocation);
}
for (auto i = 0lu; i < kernelsCount; i++) {
auto [isaOffset, isaSize] = kernelsChunks[i];
this->kernelImmDatas[i]->setIsaParentAllocation(this->kernelsIsaParentRegion.get());
this->kernelImmDatas[i]->setIsaSubAllocationOffset(isaOffset);
this->kernelImmDatas[i]->setIsaSubAllocationSize(isaSize);
}
} else {
for (auto i = 0lu; i < kernelsCount; i++) {
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
if (auto allocation = this->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize); allocation == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
this->kernelImmDatas[i]->setIsaPerKernelAllocation(allocation);
}
}
}
return ZE_RESULT_SUCCESS;
}
size_t ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize) {
auto isaPadding = this->device->getGfxCoreHelper().getPaddingForISAAllocation();
auto kernelStartPointerAlignment = this->device->getGfxCoreHelper().getKernelIsaPointerAlignment();
auto isaAllocationSize = alignUp(isaPadding + isaSize, kernelStartPointerAlignment);
return isaAllocationSize;
}
NEO::GraphicsAllocation *ModuleImp::allocateKernelsIsaMemory(size_t size) {
auto allocType = (this->type == ModuleType::Builtin ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
auto neoDevice = this->device->getNEODevice();
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(),
size,
allocType,
neoDevice->getDeviceBitfield()});
}
void ModuleImp::createDebugZebin() {
auto refBin = ArrayRef<const uint8_t>::fromAny(translationUnit->unpackedDeviceBinary.get(), translationUnit->unpackedDeviceBinarySize);
auto segments = getZebinSegments();
@@ -964,10 +880,7 @@ void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSeg
auto segmentId = &kernelImmData - &this->kernelImmDatas[0];
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelImmData->getIsaGraphicsAllocation()),
*device->getNEODevice(),
kernelImmData->getIsaGraphicsAllocation(),
kernelImmData->getIsaOffsetInParentAllocation(),
isaSegmentsForPatching[segmentId].hostPointer,
*device->getNEODevice(), kernelImmData->getIsaGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer,
isaSegmentsForPatching[segmentId].segmentSize);
kernelImmData->setIsaCopiedToAllocation();
@@ -1012,9 +925,8 @@ bool ModuleImp::linkBinary() {
if (linkerInput->getExportedFunctionsSegmentId() >= 0) {
auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation();
auto offsetInParentAllocation = this->kernelImmDatas[exportedFunctionHeapId]->getIsaOffsetInParentAllocation();
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch() + offsetInParentAllocation);
exportedFunctions.segmentSize = this->kernelImmDatas[exportedFunctionHeapId]->getIsaSize();
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
}
Linker::KernelDescriptorsT kernelDescriptors;
@@ -1026,9 +938,7 @@ bool ModuleImp::linkBinary() {
auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
}
}
@@ -1091,10 +1001,10 @@ ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnF
auto kernelImmData = this->getKernelImmutableData(pFunctionName);
if (kernelImmData != nullptr) {
auto isaAllocation = kernelImmData->getIsaGraphicsAllocation();
*pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress() + kernelImmData->getIsaOffsetInParentAllocation());
*pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress());
// Ensure that any kernel in this module which uses this kernel module function pointer has access to the memory.
for (auto &data : this->getKernelImmutableDataVector()) {
if (data.get() != kernelImmData && data.get()->getIsaOffsetInParentAllocation() == 0lu) {
if (data.get() != kernelImmData) {
data.get()->getResidencyContainer().insert(data.get()->getResidencyContainer().end(), isaAllocation);
}
}
@@ -1345,9 +1255,7 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
}
}
for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
@@ -1567,14 +1475,9 @@ void ModuleImp::notifyModuleDestroy() {
StackVec<NEO::GraphicsAllocation *, 32> ModuleImp::getModuleAllocations() {
StackVec<NEO::GraphicsAllocation *, 32> allocs;
if (auto isaParentAllocation = this->getKernelsIsaParentAllocation(); isaParentAllocation != nullptr) {
allocs.push_back(isaParentAllocation);
} else {
// ISA allocations not optimized
for (auto &kernImmData : kernelImmDatas) {
allocs.push_back(kernImmData->getIsaGraphicsAllocation());
}
}
if (translationUnit) {
if (translationUnit->globalVarBuffer) {

View File

@@ -124,7 +124,6 @@ struct ModuleImp : public Module {
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const { return kernelsIsaParentRegion.get(); }
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
@@ -156,9 +155,8 @@ struct ModuleImp : public Module {
}
protected:
MOCKABLE_VIRTUAL ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice);
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice);
ze_result_t checkIfBuildShouldBeFailed(NEO::Device *neoDevice);
ze_result_t allocateKernelImmutableDatas(size_t kernelsCount);
ze_result_t initializeKernelImmutableDatas();
void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching);
void verifyDebugCapabilities();
@@ -169,9 +167,6 @@ struct ModuleImp : public Module {
void notifyModuleCreate();
void notifyModuleDestroy();
bool populateHostGlobalSymbolsMap(std::unordered_map<std::string, std::string> &devToHostNameMapping);
ze_result_t setIsaGraphicsAllocations();
MOCKABLE_VIRTUAL size_t computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize);
MOCKABLE_VIRTUAL NEO::GraphicsAllocation *allocateKernelsIsaMemory(size_t size);
StackVec<NEO::GraphicsAllocation *, 32> getModuleAllocations();
Device *device = nullptr;
@@ -179,7 +174,6 @@ struct ModuleImp : public Module {
std::unique_ptr<ModuleTranslationUnit> translationUnit;
ModuleBuildLog *moduleBuildLog = nullptr;
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
std::unique_ptr<NEO::GraphicsAllocation> kernelsIsaParentRegion;
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
NEO::Linker::RelocatedSymbolsMap symbols;
@@ -204,7 +198,6 @@ struct ModuleImp : public Module {
uint32_t debugElfHandle = 0;
uint32_t profileFlags = 0;
uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max();
size_t isaAllocationPageSize = 0;
NEO::Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage;

View File

@@ -124,13 +124,24 @@ void ModuleImmutableDataFixture::tearDown() {
DeviceFixture::tearDown();
}
void ModuleFixture::setUp(bool skipCreatingModules) {
ModuleFixture::ProxyModuleImp *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
*result = module->initialize(desc, device->getNEODevice());
if (*result != ZE_RESULT_SUCCESS) {
module->destroy();
return nullptr;
}
return module;
}
void ModuleFixture::setUp() {
DebugManager.flags.FailBuildProgramWithStatefulAccess.set(0);
DeviceFixture::setUp();
if (skipCreatingModules == false) {
createModuleFromMockBinary();
}
}
void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
@@ -144,13 +155,7 @@ void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
ModuleBuildLog *moduleBuildLog = nullptr;
ze_result_t result = ZE_RESULT_SUCCESS;
if (!module) {
module.reset(new WhiteBox<::L0::Module>{device, moduleBuildLog, type});
}
result = module->initialize(&moduleDesc, device->getNEODevice());
if (result != ZE_RESULT_SUCCESS) {
module->destroy();
}
module.reset(ProxyModuleImp::create(device, &moduleDesc, moduleBuildLog, type, &result));
}
void ModuleFixture::createKernel() {

View File

@@ -123,8 +123,19 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
struct ModuleFixture : public DeviceFixture {
void setUp(bool skipCreatingModules);
void setUp() { this->setUp(false); }
struct ProxyModuleImp : public WhiteBox<::L0::Module> {
using BaseClass = WhiteBox<::L0::Module>;
using BaseClass::BaseClass;
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmDatas() {
return kernelImmDatas;
}
static ModuleFixture::ProxyModuleImp *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
};
void setUp();
void createModuleFromMockBinary(ModuleType type = ModuleType::User);
@@ -136,7 +147,7 @@ struct ModuleFixture : public DeviceFixture {
const std::string kernelName = "test";
const uint32_t numKernelArguments = 6;
std::unique_ptr<WhiteBox<::L0::Module>> module;
std::unique_ptr<ProxyModuleImp> module;
std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel;
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
DebugManagerStateRestore restore;

View File

@@ -34,7 +34,6 @@ set(L0_MOCKS_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h
)

View File

@@ -1,26 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "level_zero/core/source/device/device.h"
namespace L0 {
namespace ult {
ze_result_t WhiteBox<::L0::Module>::initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) {
auto result = this->BaseClass::initializeTranslationUnit(desc, neoDevice);
if (this->mockGlobalConstBuffer) {
this->translationUnit->globalConstBuffer = this->mockGlobalConstBuffer;
}
if (this->mockGlobalVarBuffer) {
this->translationUnit->globalVarBuffer = this->mockGlobalVarBuffer;
}
return result;
}
} // namespace ult
} // namespace L0

View File

@@ -13,7 +13,6 @@
#include "shared/test/common/test_macros/mock_method_macros.h"
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h"
#include "gtest/gtest.h"
@@ -49,7 +48,6 @@ constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit
template <>
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass = ::L0::ModuleImp;
using BaseClass::allocateKernelsIsaMemory;
using BaseClass::allocatePrivateMemoryPerDispatch;
using BaseClass::BaseClass;
using BaseClass::builtFromSPIRv;
@@ -63,7 +61,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass::isFunctionSymbolExportEnabled;
using BaseClass::isGlobalSymbolExportEnabled;
using BaseClass::kernelImmDatas;
using BaseClass::setIsaGraphicsAllocations;
using BaseClass::symbols;
using BaseClass::translationUnit;
using BaseClass::type;
@@ -73,11 +70,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
: ::L0::ModuleImp{device, moduleBuildLog, type} {
this->translationUnit.reset(new MockModuleTranslationUnit{device});
}
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) override;
NEO::GraphicsAllocation *mockGlobalVarBuffer = nullptr;
NEO::GraphicsAllocation *mockGlobalConstBuffer = nullptr;
};
using Module = WhiteBox<::L0::Module>;
@@ -99,23 +91,15 @@ struct Mock<Module> : public Module {
(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog));
ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_module_properties_t * pModuleProperties));
ADDMETHOD_NOBASE(getGlobalPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pGlobalName, size_t *pSize, void **pPtr));
ADDMETHOD(allocateKernelsIsaMemory, NEO::GraphicsAllocation *, true, nullptr, (size_t isaSize), (isaSize));
ADDMETHOD(computeKernelIsaAllocationAlignedSizeWithPadding, size_t, true, 0ul, (size_t isaSize), (isaSize));
};
struct MockModule : public L0::ModuleImp {
using ModuleImp::allocateKernelImmutableDatas;
using ModuleImp::allocateKernelsIsaMemory;
using ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding;
using ModuleImp::debugModuleHandle;
using ModuleImp::getModuleAllocations;
using ModuleImp::initializeKernelImmutableDatas;
using ModuleImp::isaAllocationPageSize;
using ModuleImp::isFunctionSymbolExportEnabled;
using ModuleImp::isGlobalSymbolExportEnabled;
using ModuleImp::kernelImmDatas;
using ModuleImp::populateHostGlobalSymbolsMap;
using ModuleImp::setIsaGraphicsAllocations;
using ModuleImp::symbols;
using ModuleImp::translationUnit;
@@ -131,8 +115,6 @@ struct MockModule : public L0::ModuleImp {
return kernelImmData;
}
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVectorRef() { return kernelImmDatas; }
KernelImmutableData *kernelImmData = nullptr;
};

View File

@@ -457,17 +457,11 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, IsAtLeastSkl) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
MockQueryKernelTimestampsKernel(MockModule *module) : KernelImp{module}, module{module} {
MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) {
mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor;
size_t stubKernelHeapSize = 42;
mockKernelImmutableData.setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(stubKernelHeapSize));
this->kernelImmData = &mockKernelImmutableData;
}
~MockQueryKernelTimestampsKernel() override {
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(mockKernelImmutableData.isaGraphicsAllocation.release());
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
if (argIndex == 0) {
index0Allocation = allocation;
@@ -486,13 +480,12 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetA
NEO::GraphicsAllocation *index0Allocation = nullptr;
KernelDescriptor mockKernelDescriptor = {};
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
MockModule *module = nullptr;
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin);
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(tmpModule.get());
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(static_cast<L0::ModuleImp *>(tmpModule.get()));
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel.get();

View File

@@ -1382,7 +1382,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
auto overAllocMinSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) + margin1KB;
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
auto &kernelImmDatas = this->module->kernelImmDatas;
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get());
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
for (size_t i = 0; i < kernelsNb; i++) {
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize + static_cast<uint32_t>(i * MemoryConstants::cacheLineSize);
@@ -1418,7 +1419,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
auto &kernelImmDatas = this->module->kernelImmDatas;
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get());
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
for (size_t i = 0; i < kernelsNb; i++) {
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize;

View File

@@ -8,7 +8,6 @@
#pragma once
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_compilers.h"
@@ -85,12 +84,6 @@ struct L0DebuggerHwFixture : public L0DebuggerFixture {
MockDebuggerL0Hw<GfxFamily> *getMockDebuggerL0Hw() {
return static_cast<MockDebuggerL0Hw<GfxFamily> *>(debuggerHw);
}
NEO::GraphicsAllocation *allocateIsaMemory(size_t size, bool internal) {
auto allocType = (internal ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(), size, allocType, neoDevice->getDeviceBitfield()});
}
DebuggerL0 *debuggerHw = nullptr;
};

View File

@@ -60,7 +60,6 @@ TEST_F(KernelInitializeTest, givenDebuggingEnabledWhenKernelsAreInitializedThenA
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
kernelImmutableData.setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo.heapInfo.kernelHeapSize, false));
memoryOperationsHandler->makeResidentCalledCount = 0;
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
@@ -328,14 +327,12 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinDebugDataWhenInitializingMo
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
@@ -374,12 +371,10 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenDumpElfFlagAndZebinWhenInitializ
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
@@ -438,7 +433,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinWhenModuleIsInitializedAndD
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
@@ -573,7 +567,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitial
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));

View File

@@ -1865,9 +1865,9 @@ TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRunt
EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
}
struct KernelIsaFixture : ModuleFixture {
void setUp() {
ModuleFixture::setUp(true);
struct KernelIsaTests : Test<ModuleFixture> {
void SetUp() override {
Test<ModuleFixture>::SetUp();
auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
@@ -1883,98 +1883,137 @@ struct KernelIsaFixture : ModuleFixture {
}
std::unique_ptr<OsContext> bcsOsContext;
uint32_t testKernelHeap = 0;
};
using KernelIsaTests = Test<KernelIsaFixture>;
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount();
auto &kernelImmutableData = this->module->kernelImmDatas.back();
if (kernelImmutableData->getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
} else {
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
}
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
this->createModuleFromMockBinary(ModuleType::User);
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) {
this->createModuleFromMockBinary(ModuleType::Builtin);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto &kernelImmutableData = this->module->kernelImmDatas.back();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
KernelImmutableData kernelImmutableData(device);
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
}
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) {
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto &kernelImmutableData = this->module->kernelImmDatas.back();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
KernelImmutableData kernelImmutableData(device);
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
}
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) {
this->createModuleFromMockBinary(ModuleType::User);
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto &kernelImmutableData = this->module->kernelImmDatas.back();
auto kernelHeapSize = kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize;
auto &helper = device->getNEODevice()->getGfxCoreHelper();
KernelImmutableData kernelImmutableData(device);
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation();
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation();
EXPECT_EQ(kernelImmutableData->getIsaSize(), kernelHeapSize + isaPadding);
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.kernelHeapSize + isaPadding);
}
TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) {
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.kernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t size = 0x1100;
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
ModuleBuildLog *moduleBuildLog = nullptr;
this->module.reset(new WhiteBox<::L0::Module>{this->device, moduleBuildLog, ModuleType::User});
this->module->mockGlobalVarBuffer = &globalVarBuffer;
this->module->mockGlobalConstBuffer = &globalConstBuffer;
this->createModuleFromMockBinary(ModuleType::User);
for (auto &kernelImmData : this->module->kernelImmDatas) {
auto &resCont = kernelImmData->getResidencyContainer();
kernelImmutableData.initialize(&kernelInfo, device, 0,
&globalConstBuffer, &globalVarBuffer, false);
auto &resCont = kernelImmutableData.getResidencyContainer();
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
}
this->module->translationUnit->globalConstBuffer = nullptr;
this->module->translationUnit->globalVarBuffer = nullptr;
}
using KernelImpPatchBindlessTest = Test<ModuleFixture>;

View File

@@ -26,7 +26,6 @@
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_elf.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_l0_debugger.h"
#include "shared/test/common/mocks/mock_memory_operations_handler.h"
#include "shared/test/common/mocks/mock_modules_zebin.h"
#include "shared/test/common/test_macros/hw_test.h"
@@ -105,71 +104,7 @@ HWTEST_F(ModuleTest, givenUserModuleWhenCreatedThenCorrectAllocationTypeIsUsedFo
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType());
}
template <bool localMemEnabled>
struct ModuleKernelIsaAllocationsFixture : public ModuleFixture {
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
using Module = WhiteBox<::L0::Module>;
void setUp() {
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
ModuleFixture::setUp();
ModuleBuildLog *moduleBuildLog = nullptr;
auto type = ModuleType::User;
this->module.reset(new Mock<Module>{device, moduleBuildLog, type});
zebinData = std::make_unique<ZebinTestData::ZebinWithL0TestCommonModule>(device->getHwInfo());
const auto &src = zebinData->storage;
this->moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
this->moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.data());
this->moduleDesc.inputSize = src.size();
this->mockModule = static_cast<Mock<Module> *>(this->module.get());
}
void givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned() {
mockModule->allocateKernelsIsaMemoryCallBase = false;
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
void givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned() {
mockModule->allocateKernelsIsaMemoryCallBase = false;
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingCallBase = false;
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingResult = isaAllocationPageSize;
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
}
Mock<Module> *mockModule = nullptr;
ze_module_desc_t moduleDesc = {};
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
};
using ModuleKernelIsaAllocationsInLocalMemoryTests = Test<ModuleKernelIsaAllocationsFixture<true>>;
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
}
using ModuleKernelIsaAllocationsInSharedMemoryTests = Test<ModuleKernelIsaAllocationsFixture<false>>;
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
}
HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) {
this->module.reset();
createModuleFromMockBinary(ModuleType::Builtin);
createKernel();
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType());
@@ -2327,8 +2262,16 @@ TEST_F(ModuleFunctionPointerTests, givenModuleWithExportedSymbolThenGetFunctionP
EXPECT_EQ(reinterpret_cast<uint64_t>(functionPointer), module0->kernelImmDatas[0]->getIsaGraphicsAllocation()->getGpuAddress());
}
class DeviceModuleSetArgBufferFixture : public ModuleFixture {
class DeviceModuleSetArgBufferTest : public ModuleFixture, public ::testing::Test {
public:
void SetUp() override {
ModuleFixture::setUp();
}
void TearDown() override {
ModuleFixture::tearDown();
}
void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) {
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = kernelName.c_str();
@@ -2340,11 +2283,11 @@ class DeviceModuleSetArgBufferFixture : public ModuleFixture {
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
}
};
using DeviceModuleSetArgBufferTest = Test<DeviceModuleSetArgBufferFixture>;
HWTEST_F(DeviceModuleSetArgBufferTest,
givenValidMemoryUsedinFirstCallToSetArgBufferThenNullptrSetOnTheSecondCallThenArgBufferisUpdatedInEachCallAndSuccessIsReturned) {
uint32_t rootDeviceIndex = 0;
createModuleFromMockBinary();
ze_kernel_handle_t kernelHandle;
void *validBufferPtr = nullptr;
@@ -3531,227 +3474,6 @@ TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
}
}
template <bool localMemEnabled>
struct ModuleIsaAllocationsFixture : public DeviceFixture {
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
static constexpr NEO::MemoryPool isaAllocationMemoryPool = (localMemEnabled ? NEO::MemoryPool::LocalMemory : NEO::MemoryPool::System4KBPagesWith32BitGpuAddressing);
void setUp() {
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
DeviceFixture::setUp();
this->neoDevice = this->device->getNEODevice();
this->isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
this->kernelStartPointerAlignment = this->neoDevice->getGfxCoreHelper().getKernelIsaPointerAlignment();
this->mockMemoryManager = static_cast<MockMemoryManager *>(this->neoDevice->getMemoryManager());
this->mockMemoryManager->localMemorySupported[this->neoDevice->getRootDeviceIndex()] = true;
this->mockModule.reset(new MockModule{this->device, nullptr, ModuleType::User});
this->mockModule->translationUnit.reset(new MockModuleTranslationUnit{this->device});
}
void tearDown() {
this->mockModule->translationUnit.reset();
this->mockModule.reset();
DeviceFixture::tearDown();
}
void prepareKernelInfoAndAddToTranslationUnit(size_t isaSize) {
auto kernelInfo = new KernelInfo{};
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<const void *>(0xdeadbeef0000);
kernelInfo->heapInfo.kernelHeapSize = static_cast<uint32_t>(isaSize);
this->mockModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
}
size_t computeKernelIsaAllocationSizeWithPadding(size_t isaSize) {
auto isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
return isaPadding + isaSize;
}
template <typename FamilyType>
void givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
auto requestedSize = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
auto debugger = MockDebuggerL0Hw<FamilyType>::allocate(neoDevice);
this->neoDevice->getRootDeviceEnvironmentRef().debugger.reset(debugger);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
}
void givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
auto maxAllocationSizeInPage = alignDown(isaAllocationPageSize - this->isaPadding, this->kernelStartPointerAlignment);
this->prepareKernelInfoAndAddToTranslationUnit(maxAllocationSizeInPage);
auto tinyAllocationSize = 0x8;
this->prepareKernelInfoAndAddToTranslationUnit(tinyAllocationSize);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
if constexpr (localMemEnabled) {
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[0]->getIsaSize());
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[1]->getIsaSize());
} else {
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(maxAllocationSizeInPage), kernelImmDatas[0]->getIsaSize());
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(tinyAllocationSize), kernelImmDatas[1]->getIsaSize());
}
}
struct ProxyKernelImmutableData : public KernelImmutableData {
using BaseClass = KernelImmutableData;
using BaseClass::BaseClass;
~ProxyKernelImmutableData() override { this->KernelImmutableData::~KernelImmutableData(); }
ADDMETHOD(initialize, ze_result_t, true, ZE_RESULT_ERROR_UNKNOWN,
(NEO::KernelInfo * kernelInfo, L0::Device *device, uint32_t computeUnitsUsedForScratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel),
(kernelInfo, device, computeUnitsUsedForScratch, globalConstBuffer, globalVarBuffer, internalKernel));
};
void givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched() {
auto requestedSize = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVectorRef();
{
auto kernelsCount = 3ul;
kernelImmDatas.reserve(kernelsCount);
for (size_t i = 0lu; i < kernelsCount; i++) {
kernelImmDatas.emplace_back(new ProxyKernelImmutableData(this->device));
}
auto result = this->mockModule->setIsaGraphicsAllocations();
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
}
static_cast<ProxyKernelImmutableData *>(kernelImmDatas[2].get())->initializeCallBase = false;
auto result = this->mockModule->initializeKernelImmutableDatas();
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
ASSERT_NE(kernelImmDatas[0].get(), nullptr);
ASSERT_NE(kernelImmDatas[1].get(), nullptr);
EXPECT_EQ(kernelImmDatas[2].get(), nullptr);
EXPECT_NE(kernelImmDatas[0]->getIsaGraphicsAllocation(), nullptr);
EXPECT_NE(kernelImmDatas[1]->getIsaGraphicsAllocation(), nullptr);
}
size_t isaPadding;
size_t kernelStartPointerAlignment;
NEO::Device *neoDevice = nullptr;
MockMemoryManager *mockMemoryManager = nullptr;
std::unique_ptr<MockModule> mockModule = nullptr;
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
};
using ModuleIsaAllocationsInLocalMemoryTest = Test<ModuleIsaAllocationsFixture<true>>;
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KWhenKernelImmutableDatasInitializedThenKernelIsasShareParentAllocation) {
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
auto requestedSize1 = 0x40;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
auto isaAllocationSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
auto requestedSize2 = isaAllocationPageSize - isaAllocationSize1 - this->isaPadding;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
auto isaAllocationSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationSize2);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationSize1);
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), isaAllocationSize2);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
}
HWTEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
}
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichExceedSinglePage64KWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
}
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
}
using ModuleIsaAllocationsInSystemMemoryTest = Test<ModuleIsaAllocationsFixture<false>>;
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenKernelIsaWhichCouldFitInPages4KBWhenKernelImmutableDatasInitializedThenKernelIsasCanGetSeparateAllocationsDependingOnPaddingSize) {
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
const auto requestedSize1 = 0x8;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
auto isaAllocationAlignedSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
const auto requestedSize2 = 0x4;
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
auto isaAllocationAlignedSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
// for 4kB pages, 2x isaPaddings alone could exceed isaAllocationPageSize, which precludes page sharing
const bool isasShouldShareSamePage = (isaAllocationAlignedSize1 + isaAllocationAlignedSize2 <= isaAllocationPageSize);
this->mockModule->initializeKernelImmutableDatas();
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
if (isasShouldShareSamePage) {
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationAlignedSize1);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationAlignedSize2);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize2);
} else {
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize1));
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize2));
}
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
}
HWTEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
}
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
}
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
}
using ModuleInitializeTest = Test<DeviceFixture>;
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
@@ -3844,7 +3566,9 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
moduleDesc.inputSize = 10;
ModuleBuildLog *moduleBuildLog = nullptr;
auto module = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
moduleBuildLog,
ModuleType::User);
module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
@@ -3868,8 +3592,7 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
// pass kernelInfo ownership to programInfo
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(this->device);
kernelImmData->setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer);
@@ -4041,7 +3764,6 @@ TEST_F(ModuleTests, whenCopyingPatchedSegmentsThenAllocationsAreSetWritableForTb
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -4071,7 +3793,6 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
auto patchAddr = reinterpret_cast<uintptr_t>(ptrOffset(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer(), 0x8));
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -4102,7 +3823,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinMo
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, true);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
@@ -4148,7 +3868,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
auto slmInlineSizeCopy = kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -4191,7 +3910,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
@@ -4235,7 +3953,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingModuleThe
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
@@ -4266,7 +3983,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLink
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false;
@@ -4307,9 +4023,6 @@ TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAll
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
auto isaAlloc = module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize);
ASSERT_NE(isaAlloc, nullptr);
kernelImmData->setIsaPerKernelAllocation(isaAlloc);
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
module->kernelImmDatas.push_back(std::move(kernelImmData));

View File

@@ -77,7 +77,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{
auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
auto offset = alloc->getGpuAddressToPatch();
idd.setKernelStartPointer(offset);
idd.setKernelStartPointerHigh(0u);
}

View File

@@ -87,7 +87,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
{
auto alloc = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == alloc);
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
auto offset = alloc->getGpuAddressToPatch();
if (!localIdsGenerationByRuntime) {
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}

View File

@@ -53,7 +53,6 @@ class GfxCoreHelper {
static std::unique_ptr<GfxCoreHelper> create(const GFXCORE_FAMILY gfxCoreFamily);
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
virtual size_t getPaddingForISAAllocation() const = 0;
virtual size_t getKernelIsaPointerAlignment() const = 0;
virtual uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo, const ProductHelper &productHelper) = 0;
@@ -216,10 +215,6 @@ class GfxCoreHelperHw : public GfxCoreHelper {
size_t getPaddingForISAAllocation() const override;
size_t getKernelIsaPointerAlignment() const override {
return static_cast<size_t>(GfxFamily::cmdInitInterfaceDescriptorData.KERNELSTARTPOINTER_ALIGN_SIZE);
}
uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const override;

View File

@@ -40,7 +40,6 @@ struct DispatchKernelEncoderI {
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
virtual GraphicsAllocation *getIsaAllocation() const = 0;
virtual uint64_t getIsaOffsetInParentAllocation() const = 0;
virtual const uint8_t *getDynamicStateHeapData() const = 0;
virtual uint32_t getRequiredWorkgroupOrder() const = 0;

View File

@@ -997,45 +997,6 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
EXPECT_NE(usedAfter, usedBefore);
}
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
}
HWTEST_F(EncodeDispatchKernelTest, givenKernelStartPointerAlignmentInInterfaceDescriptorWhenHelperGetterUsedThenCorrectValueReturned) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_ALIGN_SIZE, pDevice->getGfxCoreHelper().getKernelIsaPointerAlignment());
}
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
uint32_t dims[] = {2, 1, 1};
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
EXPECT_EQ(walkerCmd->getInterfaceDescriptor().getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
}
HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;

View File

@@ -55,6 +55,5 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
ADDMETHOD_CONST_NOBASE(getDynamicStateHeapData, const uint8_t *, nullptr, ());
ADDMETHOD_CONST_NOBASE(requiresGenerationOfLocalIdsByRuntime, bool, true, ());
ADDMETHOD_CONST_NOBASE(getSlmPolicy, SlmPolicy, SlmPolicy::SlmPolicyNone, ());
ADDMETHOD_CONST_NOBASE(getIsaOffsetInParentAllocation, uint64_t, 0lu, ());
};
} // namespace NEO