mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Revert "feature: Optimize intra-module kernel ISA allocations"
This reverts commit c348831470.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c348831470
commit
913a926fd4
@@ -35,7 +35,7 @@ struct KernelImmutableData {
|
||||
KernelImmutableData(L0::Device *l0device = nullptr);
|
||||
virtual ~KernelImmutableData();
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch,
|
||||
ze_result_t initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch,
|
||||
NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer,
|
||||
bool internalKernel);
|
||||
|
||||
@@ -48,14 +48,7 @@ struct KernelImmutableData {
|
||||
}
|
||||
|
||||
uint32_t getIsaSize() const;
|
||||
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const;
|
||||
void setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation);
|
||||
inline NEO::GraphicsAllocation *getIsaParentAllocation() const { return isaParentAllocation; }
|
||||
inline void setIsaParentAllocation(NEO::GraphicsAllocation *allocation) { isaParentAllocation = allocation; };
|
||||
inline size_t getIsaOffsetInParentAllocation() const { return isaSubAllocationOffset; }
|
||||
inline void setIsaSubAllocationOffset(size_t offset) { isaSubAllocationOffset = offset; }
|
||||
inline void setIsaSubAllocationSize(size_t size) { isaSubAllocationSize = size; }
|
||||
inline size_t getIsaSubAllocationSize() const { return isaSubAllocationSize; }
|
||||
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
|
||||
|
||||
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
|
||||
|
||||
@@ -87,9 +80,6 @@ struct KernelImmutableData {
|
||||
NEO::KernelInfo *kernelInfo = nullptr;
|
||||
NEO::KernelDescriptor *kernelDescriptor = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
|
||||
NEO::GraphicsAllocation *isaParentAllocation = nullptr;
|
||||
size_t isaSubAllocationOffset = 0lu;
|
||||
size_t isaSubAllocationSize = 0lu;
|
||||
|
||||
uint32_t crossThreadDataSize = 0;
|
||||
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;
|
||||
|
||||
@@ -75,6 +75,20 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
|
||||
|
||||
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
|
||||
auto neoDevice = deviceImp->getActiveDevice();
|
||||
auto memoryManager = neoDevice->getMemoryManager();
|
||||
|
||||
auto kernelIsaSize = kernelInfo->heapInfo.kernelHeapSize;
|
||||
UNRECOVERABLE_IF(kernelIsaSize == 0);
|
||||
UNRECOVERABLE_IF(!kernelInfo->heapInfo.pKernelHeap);
|
||||
const auto allocType = internalKernel ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA;
|
||||
|
||||
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{neoDevice->getRootDeviceIndex(), kernelIsaSize, allocType, neoDevice->getDeviceBitfield()});
|
||||
if (!allocation) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
isaGraphicsAllocation.reset(allocation);
|
||||
|
||||
if (neoDevice->getDebugger() && kernelInfo->kernelDescriptor.external.debugData.get()) {
|
||||
createRelocatedDebugData(globalConstBuffer, globalVarBuffer);
|
||||
@@ -189,41 +203,18 @@ void KernelImmutableData::createRelocatedDebugData(NEO::GraphicsAllocation *glob
|
||||
}
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *KernelImmutableData::getIsaGraphicsAllocation() const {
|
||||
if (auto allocation = this->getIsaParentAllocation(); allocation != nullptr) {
|
||||
DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
|
||||
DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
|
||||
return allocation;
|
||||
} else {
|
||||
DEBUG_BREAK_IF(this->isaGraphicsAllocation.get() == nullptr);
|
||||
return this->isaGraphicsAllocation.get();
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t KernelImmutableData::getIsaSize() const {
|
||||
if (this->getIsaParentAllocation()) {
|
||||
DEBUG_BREAK_IF(this->device->getL0Debugger() != nullptr);
|
||||
DEBUG_BREAK_IF(this->isaGraphicsAllocation != nullptr);
|
||||
return static_cast<uint32_t>(this->isaSubAllocationSize);
|
||||
} else {
|
||||
return static_cast<uint32_t>(this->isaGraphicsAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
}
|
||||
|
||||
void KernelImmutableData::setIsaPerKernelAllocation(NEO::GraphicsAllocation *allocation) {
|
||||
DEBUG_BREAK_IF(this->isaParentAllocation != nullptr);
|
||||
this->isaGraphicsAllocation.reset(allocation);
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
|
||||
if (baseAddress) {
|
||||
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper();
|
||||
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress() +
|
||||
this->kernelImmData->getIsaOffsetInParentAllocation());
|
||||
*baseAddress = gmmHelper->decanonize(this->kernelImmData->getIsaGraphicsAllocation()->getGpuAddress());
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t KernelImmutableData::getIsaSize() const {
|
||||
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
|
||||
KernelImp::KernelImp(Module *module) : module(module) {}
|
||||
|
||||
KernelImp::~KernelImp() {
|
||||
@@ -957,12 +948,10 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
|
||||
|
||||
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
|
||||
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
isaAllocation->setAubWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(neoDevice->getRootDeviceEnvironment(), *isaAllocation),
|
||||
*neoDevice,
|
||||
isaAllocation,
|
||||
this->kernelImmData->getIsaOffsetInParentAllocation(),
|
||||
0,
|
||||
this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(this->kernelImmData->getKernelInfo()->heapInfo.kernelHeapSize));
|
||||
}
|
||||
@@ -1220,10 +1209,6 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
|
||||
return getImmutableData()->getIsaGraphicsAllocation();
|
||||
}
|
||||
|
||||
uint64_t KernelImp::getIsaOffsetInParentAllocation() const {
|
||||
return static_cast<uint64_t>(getImmutableData()->getIsaOffsetInParentAllocation());
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
|
||||
auto &threadArbitrationPolicy = const_cast<NEO::ThreadArbitrationPolicy &>(getKernelDescriptor().kernelAttributes.threadArbitrationPolicy);
|
||||
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {
|
||||
|
||||
@@ -141,7 +141,6 @@ struct KernelImp : Kernel {
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *getIsaAllocation() const override;
|
||||
uint64_t getIsaOffsetInParentAllocation() const override;
|
||||
|
||||
uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; }
|
||||
bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; }
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
@@ -495,19 +494,12 @@ void ModuleTranslationUnit::processDebugData() {
|
||||
|
||||
ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
|
||||
: device(device), translationUnit(std::make_unique<ModuleTranslationUnit>(device)),
|
||||
moduleBuildLog(moduleBuildLog), kernelsIsaParentRegion(nullptr), type(type) {
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
this->isaAllocationPageSize = gfxCoreHelper.useSystemMemoryPlacementForISA(hwInfo) ? MemoryConstants::pageSize : MemoryConstants::pageSize64k;
|
||||
this->productFamily = hwInfo.platform.eProductFamily;
|
||||
moduleBuildLog(moduleBuildLog), type(type) {
|
||||
productFamily = device->getHwInfo().platform.eProductFamily;
|
||||
}
|
||||
|
||||
ModuleImp::~ModuleImp() {
|
||||
this->kernelImmDatas.clear();
|
||||
if (this->kernelsIsaParentRegion) {
|
||||
DEBUG_BREAK_IF(this->device->getNEODevice()->getMemoryManager() == nullptr);
|
||||
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->kernelsIsaParentRegion.release());
|
||||
}
|
||||
kernelImmDatas.clear();
|
||||
}
|
||||
|
||||
NEO::Zebin::Debug::Segments ModuleImp::getZebinSegments() {
|
||||
@@ -562,8 +554,7 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
|
||||
for (auto &ki : kernelImmDatas) {
|
||||
|
||||
if (!ki->isIsaCopiedToAllocation()) {
|
||||
ki->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
ki->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *ki->getIsaGraphicsAllocation()),
|
||||
*neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(ki->getKernelInfo()->heapInfo.kernelHeapSize));
|
||||
@@ -696,98 +687,23 @@ inline ze_result_t ModuleImp::checkIfBuildShouldBeFailed(NEO::Device *neoDevice)
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t ModuleImp::initializeKernelImmutableDatas() {
|
||||
if (size_t kernelsCount = this->translationUnit->programInfo.kernelInfos.size(); kernelsCount > 0lu) {
|
||||
ze_result_t result;
|
||||
if (result = this->allocateKernelImmutableDatas(kernelsCount); result != ZE_RESULT_SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
for (size_t i = 0lu; i < kernelsCount; i++) {
|
||||
result = kernelImmDatas[i]->initialize(this->translationUnit->programInfo.kernelInfos[i],
|
||||
device,
|
||||
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
|
||||
this->translationUnit->globalConstBuffer,
|
||||
this->translationUnit->globalVarBuffer,
|
||||
inline ze_result_t ModuleImp::initializeKernelImmutableDatas() {
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
|
||||
kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size());
|
||||
for (auto &ki : this->translationUnit->programInfo.kernelInfos) {
|
||||
std::unique_ptr<KernelImmutableData> kernelImmData{new KernelImmutableData(this->device)};
|
||||
result = kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
|
||||
this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer,
|
||||
this->type == ModuleType::Builtin);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
kernelImmDatas[i].reset();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t ModuleImp::allocateKernelImmutableDatas(size_t kernelsCount) {
|
||||
if (this->kernelImmDatas.size() == kernelsCount) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
this->kernelImmDatas.reserve(kernelsCount);
|
||||
for (size_t i = 0lu; i < kernelsCount; i++) {
|
||||
this->kernelImmDatas.emplace_back(new KernelImmutableData(this->device));
|
||||
}
|
||||
return this->setIsaGraphicsAllocations();
|
||||
}
|
||||
|
||||
ze_result_t ModuleImp::setIsaGraphicsAllocations() {
|
||||
size_t kernelsCount = this->kernelImmDatas.size();
|
||||
|
||||
auto kernelsChunks = std::vector<std::pair<size_t, size_t>>(kernelsCount);
|
||||
size_t kernelsIsaTotalSize = 0lu;
|
||||
for (auto i = 0lu; i < kernelsCount; i++) {
|
||||
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
|
||||
DEBUG_BREAK_IF(kernelInfo->heapInfo.kernelHeapSize == 0lu);
|
||||
DEBUG_BREAK_IF(!kernelInfo->heapInfo.pKernelHeap);
|
||||
auto chunkOffset = kernelsIsaTotalSize;
|
||||
auto chunkSize = this->computeKernelIsaAllocationAlignedSizeWithPadding(kernelInfo->heapInfo.kernelHeapSize);
|
||||
kernelsIsaTotalSize += chunkSize;
|
||||
kernelsChunks[i] = {chunkOffset, chunkSize};
|
||||
}
|
||||
|
||||
bool debuggerDisabled = (this->device->getL0Debugger() == nullptr);
|
||||
if (debuggerDisabled && kernelsIsaTotalSize <= isaAllocationPageSize) {
|
||||
if (auto allocation = this->allocateKernelsIsaMemory(kernelsIsaTotalSize); allocation == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
} else {
|
||||
this->kernelsIsaParentRegion.reset(allocation);
|
||||
}
|
||||
|
||||
for (auto i = 0lu; i < kernelsCount; i++) {
|
||||
auto [isaOffset, isaSize] = kernelsChunks[i];
|
||||
this->kernelImmDatas[i]->setIsaParentAllocation(this->kernelsIsaParentRegion.get());
|
||||
this->kernelImmDatas[i]->setIsaSubAllocationOffset(isaOffset);
|
||||
this->kernelImmDatas[i]->setIsaSubAllocationSize(isaSize);
|
||||
}
|
||||
} else {
|
||||
for (auto i = 0lu; i < kernelsCount; i++) {
|
||||
auto kernelInfo = this->translationUnit->programInfo.kernelInfos[i];
|
||||
if (auto allocation = this->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize); allocation == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
} else {
|
||||
this->kernelImmDatas[i]->setIsaPerKernelAllocation(allocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
size_t ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize) {
|
||||
auto isaPadding = this->device->getGfxCoreHelper().getPaddingForISAAllocation();
|
||||
auto kernelStartPointerAlignment = this->device->getGfxCoreHelper().getKernelIsaPointerAlignment();
|
||||
auto isaAllocationSize = alignUp(isaPadding + isaSize, kernelStartPointerAlignment);
|
||||
return isaAllocationSize;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *ModuleImp::allocateKernelsIsaMemory(size_t size) {
|
||||
auto allocType = (this->type == ModuleType::Builtin ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(),
|
||||
size,
|
||||
allocType,
|
||||
neoDevice->getDeviceBitfield()});
|
||||
}
|
||||
|
||||
void ModuleImp::createDebugZebin() {
|
||||
auto refBin = ArrayRef<const uint8_t>::fromAny(translationUnit->unpackedDeviceBinary.get(), translationUnit->unpackedDeviceBinarySize);
|
||||
auto segments = getZebinSegments();
|
||||
@@ -964,10 +880,7 @@ void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSeg
|
||||
auto segmentId = &kernelImmData - &this->kernelImmDatas[0];
|
||||
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelImmData->getIsaGraphicsAllocation()),
|
||||
*device->getNEODevice(),
|
||||
kernelImmData->getIsaGraphicsAllocation(),
|
||||
kernelImmData->getIsaOffsetInParentAllocation(),
|
||||
isaSegmentsForPatching[segmentId].hostPointer,
|
||||
*device->getNEODevice(), kernelImmData->getIsaGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer,
|
||||
isaSegmentsForPatching[segmentId].segmentSize);
|
||||
|
||||
kernelImmData->setIsaCopiedToAllocation();
|
||||
@@ -1012,9 +925,8 @@ bool ModuleImp::linkBinary() {
|
||||
if (linkerInput->getExportedFunctionsSegmentId() >= 0) {
|
||||
auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
|
||||
this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation();
|
||||
auto offsetInParentAllocation = this->kernelImmDatas[exportedFunctionHeapId]->getIsaOffsetInParentAllocation();
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch() + offsetInParentAllocation);
|
||||
exportedFunctions.segmentSize = this->kernelImmDatas[exportedFunctionHeapId]->getIsaSize();
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
|
||||
exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
|
||||
}
|
||||
|
||||
Linker::KernelDescriptorsT kernelDescriptors;
|
||||
@@ -1026,9 +938,7 @@ bool ModuleImp::linkBinary() {
|
||||
auto &kernHeapInfo = kernelInfo->heapInfo;
|
||||
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
|
||||
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
|
||||
auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
|
||||
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
|
||||
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
|
||||
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
|
||||
kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
|
||||
}
|
||||
}
|
||||
@@ -1091,10 +1001,10 @@ ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnF
|
||||
auto kernelImmData = this->getKernelImmutableData(pFunctionName);
|
||||
if (kernelImmData != nullptr) {
|
||||
auto isaAllocation = kernelImmData->getIsaGraphicsAllocation();
|
||||
*pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress() + kernelImmData->getIsaOffsetInParentAllocation());
|
||||
*pfnFunction = reinterpret_cast<void *>(isaAllocation->getGpuAddress());
|
||||
// Ensure that any kernel in this module which uses this kernel module function pointer has access to the memory.
|
||||
for (auto &data : this->getKernelImmutableDataVector()) {
|
||||
if (data.get() != kernelImmData && data.get()->getIsaOffsetInParentAllocation() == 0lu) {
|
||||
if (data.get() != kernelImmData) {
|
||||
data.get()->getResidencyContainer().insert(data.get()->getResidencyContainer().end(), isaAllocation);
|
||||
}
|
||||
}
|
||||
@@ -1345,9 +1255,7 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
|
||||
auto &kernHeapInfo = kernelInfo->heapInfo;
|
||||
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
|
||||
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
|
||||
auto isaAddressToPatch = static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch() +
|
||||
kernelImmDatas.at(i)->getIsaOffsetInParentAllocation());
|
||||
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), isaAddressToPatch, kernHeapInfo.kernelHeapSize});
|
||||
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelImmDatas.at(i)->getIsaGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
|
||||
}
|
||||
}
|
||||
for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
|
||||
@@ -1567,14 +1475,9 @@ void ModuleImp::notifyModuleDestroy() {
|
||||
|
||||
StackVec<NEO::GraphicsAllocation *, 32> ModuleImp::getModuleAllocations() {
|
||||
StackVec<NEO::GraphicsAllocation *, 32> allocs;
|
||||
if (auto isaParentAllocation = this->getKernelsIsaParentAllocation(); isaParentAllocation != nullptr) {
|
||||
allocs.push_back(isaParentAllocation);
|
||||
} else {
|
||||
// ISA allocations not optimized
|
||||
for (auto &kernImmData : kernelImmDatas) {
|
||||
allocs.push_back(kernImmData->getIsaGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
if (translationUnit) {
|
||||
if (translationUnit->globalVarBuffer) {
|
||||
|
||||
@@ -124,7 +124,6 @@ struct ModuleImp : public Module {
|
||||
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
|
||||
|
||||
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
|
||||
NEO::GraphicsAllocation *getKernelsIsaParentAllocation() const { return kernelsIsaParentRegion.get(); }
|
||||
|
||||
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
|
||||
|
||||
@@ -156,9 +155,8 @@ struct ModuleImp : public Module {
|
||||
}
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice);
|
||||
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice);
|
||||
ze_result_t checkIfBuildShouldBeFailed(NEO::Device *neoDevice);
|
||||
ze_result_t allocateKernelImmutableDatas(size_t kernelsCount);
|
||||
ze_result_t initializeKernelImmutableDatas();
|
||||
void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching);
|
||||
void verifyDebugCapabilities();
|
||||
@@ -169,9 +167,6 @@ struct ModuleImp : public Module {
|
||||
void notifyModuleCreate();
|
||||
void notifyModuleDestroy();
|
||||
bool populateHostGlobalSymbolsMap(std::unordered_map<std::string, std::string> &devToHostNameMapping);
|
||||
ze_result_t setIsaGraphicsAllocations();
|
||||
MOCKABLE_VIRTUAL size_t computeKernelIsaAllocationAlignedSizeWithPadding(size_t isaSize);
|
||||
MOCKABLE_VIRTUAL NEO::GraphicsAllocation *allocateKernelsIsaMemory(size_t size);
|
||||
StackVec<NEO::GraphicsAllocation *, 32> getModuleAllocations();
|
||||
|
||||
Device *device = nullptr;
|
||||
@@ -179,7 +174,6 @@ struct ModuleImp : public Module {
|
||||
std::unique_ptr<ModuleTranslationUnit> translationUnit;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
|
||||
std::unique_ptr<NEO::GraphicsAllocation> kernelsIsaParentRegion;
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
|
||||
NEO::Linker::RelocatedSymbolsMap symbols;
|
||||
|
||||
@@ -204,7 +198,6 @@ struct ModuleImp : public Module {
|
||||
uint32_t debugElfHandle = 0;
|
||||
uint32_t profileFlags = 0;
|
||||
uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max();
|
||||
size_t isaAllocationPageSize = 0;
|
||||
|
||||
NEO::Linker::PatchableSegments isaSegmentsForPatching;
|
||||
std::vector<std::vector<char>> patchedIsaTempStorage;
|
||||
|
||||
@@ -124,13 +124,24 @@ void ModuleImmutableDataFixture::tearDown() {
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
void ModuleFixture::setUp(bool skipCreatingModules) {
|
||||
ModuleFixture::ProxyModuleImp *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
|
||||
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
|
||||
|
||||
*result = module->initialize(desc, device->getNEODevice());
|
||||
if (*result != ZE_RESULT_SUCCESS) {
|
||||
module->destroy();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
void ModuleFixture::setUp() {
|
||||
DebugManager.flags.FailBuildProgramWithStatefulAccess.set(0);
|
||||
|
||||
DeviceFixture::setUp();
|
||||
if (skipCreatingModules == false) {
|
||||
createModuleFromMockBinary();
|
||||
}
|
||||
}
|
||||
|
||||
void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
|
||||
@@ -144,13 +155,7 @@ void ModuleFixture::createModuleFromMockBinary(ModuleType type) {
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
if (!module) {
|
||||
module.reset(new WhiteBox<::L0::Module>{device, moduleBuildLog, type});
|
||||
}
|
||||
result = module->initialize(&moduleDesc, device->getNEODevice());
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
module->destroy();
|
||||
}
|
||||
module.reset(ProxyModuleImp::create(device, &moduleDesc, moduleBuildLog, type, &result));
|
||||
}
|
||||
|
||||
void ModuleFixture::createKernel() {
|
||||
|
||||
@@ -123,8 +123,19 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
|
||||
struct ModuleFixture : public DeviceFixture {
|
||||
|
||||
void setUp(bool skipCreatingModules);
|
||||
void setUp() { this->setUp(false); }
|
||||
struct ProxyModuleImp : public WhiteBox<::L0::Module> {
|
||||
using BaseClass = WhiteBox<::L0::Module>;
|
||||
using BaseClass::BaseClass;
|
||||
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmDatas() {
|
||||
return kernelImmDatas;
|
||||
}
|
||||
|
||||
static ModuleFixture::ProxyModuleImp *create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
|
||||
};
|
||||
|
||||
void setUp();
|
||||
|
||||
void createModuleFromMockBinary(ModuleType type = ModuleType::User);
|
||||
|
||||
@@ -136,7 +147,7 @@ struct ModuleFixture : public DeviceFixture {
|
||||
|
||||
const std::string kernelName = "test";
|
||||
const uint32_t numKernelArguments = 6;
|
||||
std::unique_ptr<WhiteBox<::L0::Module>> module;
|
||||
std::unique_ptr<ProxyModuleImp> module;
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel;
|
||||
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
@@ -34,7 +34,6 @@ set(L0_MOCKS_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h
|
||||
)
|
||||
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
ze_result_t WhiteBox<::L0::Module>::initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) {
|
||||
auto result = this->BaseClass::initializeTranslationUnit(desc, neoDevice);
|
||||
if (this->mockGlobalConstBuffer) {
|
||||
this->translationUnit->globalConstBuffer = this->mockGlobalConstBuffer;
|
||||
}
|
||||
if (this->mockGlobalVarBuffer) {
|
||||
this->translationUnit->globalVarBuffer = this->mockGlobalVarBuffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "shared/test/common/test_macros/mock_method_macros.h"
|
||||
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/mock.h"
|
||||
#include "level_zero/core/test/unit_tests/white_box.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
@@ -49,7 +48,6 @@ constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit
|
||||
template <>
|
||||
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass = ::L0::ModuleImp;
|
||||
using BaseClass::allocateKernelsIsaMemory;
|
||||
using BaseClass::allocatePrivateMemoryPerDispatch;
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::builtFromSPIRv;
|
||||
@@ -63,7 +61,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass::isFunctionSymbolExportEnabled;
|
||||
using BaseClass::isGlobalSymbolExportEnabled;
|
||||
using BaseClass::kernelImmDatas;
|
||||
using BaseClass::setIsaGraphicsAllocations;
|
||||
using BaseClass::symbols;
|
||||
using BaseClass::translationUnit;
|
||||
using BaseClass::type;
|
||||
@@ -73,11 +70,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
: ::L0::ModuleImp{device, moduleBuildLog, type} {
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit{device});
|
||||
}
|
||||
|
||||
ze_result_t initializeTranslationUnit(const ze_module_desc_t *desc, NEO::Device *neoDevice) override;
|
||||
|
||||
NEO::GraphicsAllocation *mockGlobalVarBuffer = nullptr;
|
||||
NEO::GraphicsAllocation *mockGlobalConstBuffer = nullptr;
|
||||
};
|
||||
|
||||
using Module = WhiteBox<::L0::Module>;
|
||||
@@ -99,23 +91,15 @@ struct Mock<Module> : public Module {
|
||||
(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog));
|
||||
ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_module_properties_t * pModuleProperties));
|
||||
ADDMETHOD_NOBASE(getGlobalPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pGlobalName, size_t *pSize, void **pPtr));
|
||||
ADDMETHOD(allocateKernelsIsaMemory, NEO::GraphicsAllocation *, true, nullptr, (size_t isaSize), (isaSize));
|
||||
ADDMETHOD(computeKernelIsaAllocationAlignedSizeWithPadding, size_t, true, 0ul, (size_t isaSize), (isaSize));
|
||||
};
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::allocateKernelImmutableDatas;
|
||||
using ModuleImp::allocateKernelsIsaMemory;
|
||||
using ModuleImp::computeKernelIsaAllocationAlignedSizeWithPadding;
|
||||
using ModuleImp::debugModuleHandle;
|
||||
using ModuleImp::getModuleAllocations;
|
||||
using ModuleImp::initializeKernelImmutableDatas;
|
||||
using ModuleImp::isaAllocationPageSize;
|
||||
using ModuleImp::isFunctionSymbolExportEnabled;
|
||||
using ModuleImp::isGlobalSymbolExportEnabled;
|
||||
using ModuleImp::kernelImmDatas;
|
||||
using ModuleImp::populateHostGlobalSymbolsMap;
|
||||
using ModuleImp::setIsaGraphicsAllocations;
|
||||
using ModuleImp::symbols;
|
||||
using ModuleImp::translationUnit;
|
||||
|
||||
@@ -131,8 +115,6 @@ struct MockModule : public L0::ModuleImp {
|
||||
return kernelImmData;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVectorRef() { return kernelImmDatas; }
|
||||
|
||||
KernelImmutableData *kernelImmData = nullptr;
|
||||
};
|
||||
|
||||
|
||||
@@ -457,17 +457,11 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
|
||||
HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, IsAtLeastSkl) {
|
||||
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
|
||||
public:
|
||||
MockQueryKernelTimestampsKernel(MockModule *module) : KernelImp{module}, module{module} {
|
||||
MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) {
|
||||
mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor;
|
||||
size_t stubKernelHeapSize = 42;
|
||||
mockKernelImmutableData.setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(stubKernelHeapSize));
|
||||
this->kernelImmData = &mockKernelImmutableData;
|
||||
}
|
||||
|
||||
~MockQueryKernelTimestampsKernel() override {
|
||||
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(mockKernelImmutableData.isaGraphicsAllocation.release());
|
||||
}
|
||||
|
||||
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
|
||||
if (argIndex == 0) {
|
||||
index0Allocation = allocation;
|
||||
@@ -486,13 +480,12 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetA
|
||||
NEO::GraphicsAllocation *index0Allocation = nullptr;
|
||||
KernelDescriptor mockKernelDescriptor = {};
|
||||
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
|
||||
MockModule *module = nullptr;
|
||||
};
|
||||
|
||||
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
|
||||
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
|
||||
tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin);
|
||||
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(tmpModule.get());
|
||||
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(static_cast<L0::ModuleImp *>(tmpModule.get()));
|
||||
}
|
||||
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
|
||||
return tmpMockKernel.get();
|
||||
|
||||
@@ -1382,7 +1382,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
|
||||
auto overAllocMinSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) + margin1KB;
|
||||
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get());
|
||||
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
|
||||
for (size_t i = 0; i < kernelsNb; i++) {
|
||||
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = overAllocMinSize + static_cast<uint32_t>(i * MemoryConstants::cacheLineSize);
|
||||
@@ -1418,7 +1419,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
|
||||
auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
|
||||
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
auto proxyModuleImpl = static_cast<ModuleFixture::ProxyModuleImp *>(this->module.get());
|
||||
auto &kernelImmDatas = proxyModuleImpl->getKernelImmDatas();
|
||||
for (size_t i = 0; i < kernelsNb; i++) {
|
||||
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[i]->getDescriptor());
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize;
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_builtins.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
@@ -85,12 +84,6 @@ struct L0DebuggerHwFixture : public L0DebuggerFixture {
|
||||
MockDebuggerL0Hw<GfxFamily> *getMockDebuggerL0Hw() {
|
||||
return static_cast<MockDebuggerL0Hw<GfxFamily> *>(debuggerHw);
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *allocateIsaMemory(size_t size, bool internal) {
|
||||
auto allocType = (internal ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA);
|
||||
return neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({neoDevice->getRootDeviceIndex(), size, allocType, neoDevice->getDeviceBitfield()});
|
||||
}
|
||||
|
||||
DebuggerL0 *debuggerHw = nullptr;
|
||||
};
|
||||
|
||||
|
||||
@@ -60,7 +60,6 @@ TEST_F(KernelInitializeTest, givenDebuggingEnabledWhenKernelsAreInitializedThenA
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
kernelImmutableData.setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo.heapInfo.kernelHeapSize, false));
|
||||
|
||||
memoryOperationsHandler->makeResidentCalledCount = 0;
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
@@ -328,14 +327,12 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinDebugDataWhenInitializingMo
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
@@ -374,12 +371,10 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenDumpElfFlagAndZebinWhenInitializ
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
@@ -438,7 +433,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinWhenModuleIsInitializedAndD
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
@@ -573,7 +567,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitial
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->setIsaPerKernelAllocation(this->allocateIsaMemory(kernelInfo->heapInfo.kernelHeapSize, false));
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
@@ -1865,9 +1865,9 @@ TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRunt
|
||||
EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
|
||||
}
|
||||
|
||||
struct KernelIsaFixture : ModuleFixture {
|
||||
void setUp() {
|
||||
ModuleFixture::setUp(true);
|
||||
struct KernelIsaTests : Test<ModuleFixture> {
|
||||
void SetUp() override {
|
||||
Test<ModuleFixture>::SetUp();
|
||||
|
||||
auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
|
||||
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
|
||||
@@ -1883,98 +1883,137 @@ struct KernelIsaFixture : ModuleFixture {
|
||||
}
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
uint32_t testKernelHeap = 0;
|
||||
};
|
||||
|
||||
using KernelIsaTests = Test<KernelIsaFixture>;
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
auto &kernelImmutableData = this->module->kernelImmDatas.back();
|
||||
if (kernelImmutableData->getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
|
||||
if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
|
||||
} else {
|
||||
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
|
||||
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::AllocationType::KERNEL_ISA) - 1));
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
|
||||
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
|
||||
auto initialTaskCount = bcsCsr->peekTaskCount();
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
|
||||
|
||||
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) {
|
||||
this->createModuleFromMockBinary(ModuleType::Builtin);
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto &kernelImmutableData = this->module->kernelImmDatas.back();
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) {
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto &kernelImmutableData = this->module->kernelImmDatas.back();
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData->getIsaGraphicsAllocation()->getAllocationType());
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) {
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto &kernelImmutableData = this->module->kernelImmDatas.back();
|
||||
auto kernelHeapSize = kernelImmutableData->getKernelInfo()->heapInfo.kernelHeapSize;
|
||||
auto &helper = device->getNEODevice()->getGfxCoreHelper();
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation();
|
||||
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
size_t isaPadding = helper.getPaddingForISAAllocation();
|
||||
EXPECT_EQ(kernelImmutableData->getIsaSize(), kernelHeapSize + isaPadding);
|
||||
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.kernelHeapSize + isaPadding);
|
||||
}
|
||||
|
||||
TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) {
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.kernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
KernelImmutableData kernelImmutableData(device);
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
size_t size = 0x1100;
|
||||
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
|
||||
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
this->module.reset(new WhiteBox<::L0::Module>{this->device, moduleBuildLog, ModuleType::User});
|
||||
this->module->mockGlobalVarBuffer = &globalVarBuffer;
|
||||
this->module->mockGlobalConstBuffer = &globalConstBuffer;
|
||||
|
||||
this->createModuleFromMockBinary(ModuleType::User);
|
||||
|
||||
for (auto &kernelImmData : this->module->kernelImmDatas) {
|
||||
auto &resCont = kernelImmData->getResidencyContainer();
|
||||
kernelImmutableData.initialize(&kernelInfo, device, 0,
|
||||
&globalConstBuffer, &globalVarBuffer, false);
|
||||
auto &resCont = kernelImmutableData.getResidencyContainer();
|
||||
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
|
||||
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
|
||||
}
|
||||
this->module->translationUnit->globalConstBuffer = nullptr;
|
||||
this->module->translationUnit->globalVarBuffer = nullptr;
|
||||
}
|
||||
|
||||
using KernelImpPatchBindlessTest = Test<ModuleFixture>;
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_elf.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_l0_debugger.h"
|
||||
#include "shared/test/common/mocks/mock_memory_operations_handler.h"
|
||||
#include "shared/test/common/mocks/mock_modules_zebin.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
@@ -105,71 +104,7 @@ HWTEST_F(ModuleTest, givenUserModuleWhenCreatedThenCorrectAllocationTypeIsUsedFo
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType());
|
||||
}
|
||||
|
||||
template <bool localMemEnabled>
|
||||
struct ModuleKernelIsaAllocationsFixture : public ModuleFixture {
|
||||
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
|
||||
using Module = WhiteBox<::L0::Module>;
|
||||
|
||||
void setUp() {
|
||||
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
|
||||
ModuleFixture::setUp();
|
||||
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
auto type = ModuleType::User;
|
||||
this->module.reset(new Mock<Module>{device, moduleBuildLog, type});
|
||||
|
||||
zebinData = std::make_unique<ZebinTestData::ZebinWithL0TestCommonModule>(device->getHwInfo());
|
||||
const auto &src = zebinData->storage;
|
||||
this->moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
|
||||
this->moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.data());
|
||||
this->moduleDesc.inputSize = src.size();
|
||||
|
||||
this->mockModule = static_cast<Mock<Module> *>(this->module.get());
|
||||
}
|
||||
|
||||
void givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned() {
|
||||
mockModule->allocateKernelsIsaMemoryCallBase = false;
|
||||
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
void givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned() {
|
||||
mockModule->allocateKernelsIsaMemoryCallBase = false;
|
||||
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingCallBase = false;
|
||||
mockModule->computeKernelIsaAllocationAlignedSizeWithPaddingResult = isaAllocationPageSize;
|
||||
|
||||
auto result = module->initialize(&this->moduleDesc, device->getNEODevice());
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
Mock<Module> *mockModule = nullptr;
|
||||
ze_module_desc_t moduleDesc = {};
|
||||
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
|
||||
};
|
||||
|
||||
using ModuleKernelIsaAllocationsInLocalMemoryTests = Test<ModuleKernelIsaAllocationsFixture<true>>;
|
||||
|
||||
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
|
||||
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleKernelIsaAllocationsInLocalMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
|
||||
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
|
||||
}
|
||||
|
||||
using ModuleKernelIsaAllocationsInSharedMemoryTests = Test<ModuleKernelIsaAllocationsFixture<false>>;
|
||||
|
||||
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned) {
|
||||
this->givenIsaMemoryRegionSharedBetweenKernelsWhenGraphicsAllocationFailsThenProperErrorReturned();
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleKernelIsaAllocationsInSharedMemoryTests, givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned) {
|
||||
this->givenSeparateIsaMemoryRegionPerKernelWhenGraphicsAllocationFailsThenProperErrorReturned();
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) {
|
||||
this->module.reset();
|
||||
createModuleFromMockBinary(ModuleType::Builtin);
|
||||
createKernel();
|
||||
EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType());
|
||||
@@ -2327,8 +2262,16 @@ TEST_F(ModuleFunctionPointerTests, givenModuleWithExportedSymbolThenGetFunctionP
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(functionPointer), module0->kernelImmDatas[0]->getIsaGraphicsAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
class DeviceModuleSetArgBufferFixture : public ModuleFixture {
|
||||
class DeviceModuleSetArgBufferTest : public ModuleFixture, public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ModuleFixture::setUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ModuleFixture::tearDown();
|
||||
}
|
||||
|
||||
void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) {
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = kernelName.c_str();
|
||||
@@ -2340,11 +2283,11 @@ class DeviceModuleSetArgBufferFixture : public ModuleFixture {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
}
|
||||
};
|
||||
using DeviceModuleSetArgBufferTest = Test<DeviceModuleSetArgBufferFixture>;
|
||||
|
||||
HWTEST_F(DeviceModuleSetArgBufferTest,
|
||||
givenValidMemoryUsedinFirstCallToSetArgBufferThenNullptrSetOnTheSecondCallThenArgBufferisUpdatedInEachCallAndSuccessIsReturned) {
|
||||
uint32_t rootDeviceIndex = 0;
|
||||
createModuleFromMockBinary();
|
||||
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
void *validBufferPtr = nullptr;
|
||||
@@ -3531,227 +3474,6 @@ TEST_F(ModuleTest, whenContainsStatefulAccessIsCalledThenResultIsCorrect) {
|
||||
}
|
||||
}
|
||||
|
||||
template <bool localMemEnabled>
|
||||
struct ModuleIsaAllocationsFixture : public DeviceFixture {
|
||||
static constexpr size_t isaAllocationPageSize = (localMemEnabled ? MemoryConstants::pageSize64k : MemoryConstants::pageSize);
|
||||
static constexpr NEO::MemoryPool isaAllocationMemoryPool = (localMemEnabled ? NEO::MemoryPool::LocalMemory : NEO::MemoryPool::System4KBPagesWith32BitGpuAddressing);
|
||||
|
||||
void setUp() {
|
||||
this->dbgRestorer = std::make_unique<DebugManagerStateRestore>();
|
||||
DebugManager.flags.EnableLocalMemory.set(localMemEnabled);
|
||||
|
||||
DeviceFixture::setUp();
|
||||
|
||||
this->neoDevice = this->device->getNEODevice();
|
||||
this->isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
|
||||
this->kernelStartPointerAlignment = this->neoDevice->getGfxCoreHelper().getKernelIsaPointerAlignment();
|
||||
this->mockMemoryManager = static_cast<MockMemoryManager *>(this->neoDevice->getMemoryManager());
|
||||
this->mockMemoryManager->localMemorySupported[this->neoDevice->getRootDeviceIndex()] = true;
|
||||
this->mockModule.reset(new MockModule{this->device, nullptr, ModuleType::User});
|
||||
this->mockModule->translationUnit.reset(new MockModuleTranslationUnit{this->device});
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
this->mockModule->translationUnit.reset();
|
||||
this->mockModule.reset();
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
void prepareKernelInfoAndAddToTranslationUnit(size_t isaSize) {
|
||||
auto kernelInfo = new KernelInfo{};
|
||||
kernelInfo->heapInfo.pKernelHeap = reinterpret_cast<const void *>(0xdeadbeef0000);
|
||||
kernelInfo->heapInfo.kernelHeapSize = static_cast<uint32_t>(isaSize);
|
||||
this->mockModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
}
|
||||
|
||||
size_t computeKernelIsaAllocationSizeWithPadding(size_t isaSize) {
|
||||
auto isaPadding = this->neoDevice->getGfxCoreHelper().getPaddingForISAAllocation();
|
||||
return isaPadding + isaSize;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
|
||||
auto requestedSize = 0x40;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
|
||||
|
||||
auto debugger = MockDebuggerL0Hw<FamilyType>::allocate(neoDevice);
|
||||
this->neoDevice->getRootDeviceEnvironmentRef().debugger.reset(debugger);
|
||||
|
||||
this->mockModule->initializeKernelImmutableDatas();
|
||||
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
void givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations() {
|
||||
auto maxAllocationSizeInPage = alignDown(isaAllocationPageSize - this->isaPadding, this->kernelStartPointerAlignment);
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(maxAllocationSizeInPage);
|
||||
|
||||
auto tinyAllocationSize = 0x8;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(tinyAllocationSize);
|
||||
|
||||
this->mockModule->initializeKernelImmutableDatas();
|
||||
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
|
||||
if constexpr (localMemEnabled) {
|
||||
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[0]->getIsaSize());
|
||||
EXPECT_EQ(isaAllocationPageSize, kernelImmDatas[1]->getIsaSize());
|
||||
} else {
|
||||
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(maxAllocationSizeInPage), kernelImmDatas[0]->getIsaSize());
|
||||
EXPECT_EQ(this->computeKernelIsaAllocationSizeWithPadding(tinyAllocationSize), kernelImmDatas[1]->getIsaSize());
|
||||
}
|
||||
}
|
||||
|
||||
struct ProxyKernelImmutableData : public KernelImmutableData {
|
||||
using BaseClass = KernelImmutableData;
|
||||
using BaseClass::BaseClass;
|
||||
|
||||
~ProxyKernelImmutableData() override { this->KernelImmutableData::~KernelImmutableData(); }
|
||||
|
||||
ADDMETHOD(initialize, ze_result_t, true, ZE_RESULT_ERROR_UNKNOWN,
|
||||
(NEO::KernelInfo * kernelInfo, L0::Device *device, uint32_t computeUnitsUsedForScratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel),
|
||||
(kernelInfo, device, computeUnitsUsedForScratch, globalConstBuffer, globalVarBuffer, internalKernel));
|
||||
};
|
||||
|
||||
void givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched() {
|
||||
auto requestedSize = 0x40;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize);
|
||||
|
||||
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVectorRef();
|
||||
{
|
||||
auto kernelsCount = 3ul;
|
||||
kernelImmDatas.reserve(kernelsCount);
|
||||
for (size_t i = 0lu; i < kernelsCount; i++) {
|
||||
kernelImmDatas.emplace_back(new ProxyKernelImmutableData(this->device));
|
||||
}
|
||||
auto result = this->mockModule->setIsaGraphicsAllocations();
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
static_cast<ProxyKernelImmutableData *>(kernelImmDatas[2].get())->initializeCallBase = false;
|
||||
auto result = this->mockModule->initializeKernelImmutableDatas();
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
|
||||
ASSERT_NE(kernelImmDatas[0].get(), nullptr);
|
||||
ASSERT_NE(kernelImmDatas[1].get(), nullptr);
|
||||
EXPECT_EQ(kernelImmDatas[2].get(), nullptr);
|
||||
EXPECT_NE(kernelImmDatas[0]->getIsaGraphicsAllocation(), nullptr);
|
||||
EXPECT_NE(kernelImmDatas[1]->getIsaGraphicsAllocation(), nullptr);
|
||||
}
|
||||
|
||||
size_t isaPadding;
|
||||
size_t kernelStartPointerAlignment;
|
||||
NEO::Device *neoDevice = nullptr;
|
||||
MockMemoryManager *mockMemoryManager = nullptr;
|
||||
std::unique_ptr<MockModule> mockModule = nullptr;
|
||||
std::unique_ptr<DebugManagerStateRestore> dbgRestorer = nullptr;
|
||||
};
|
||||
using ModuleIsaAllocationsInLocalMemoryTest = Test<ModuleIsaAllocationsFixture<true>>;
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KWhenKernelImmutableDatasInitializedThenKernelIsasShareParentAllocation) {
|
||||
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
|
||||
|
||||
auto requestedSize1 = 0x40;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
|
||||
auto isaAllocationSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
|
||||
|
||||
auto requestedSize2 = isaAllocationPageSize - isaAllocationSize1 - this->isaPadding;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
|
||||
auto isaAllocationSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
|
||||
|
||||
this->mockModule->initializeKernelImmutableDatas();
|
||||
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), isaAllocationSize1);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationSize1);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationSize2);
|
||||
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationSize1);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), isaAllocationSize2);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichFitInSinglePage64KAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
|
||||
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
|
||||
}
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhichExceedSinglePage64KWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
|
||||
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
|
||||
}
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInLocalMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
|
||||
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
|
||||
}
|
||||
|
||||
using ModuleIsaAllocationsInSystemMemoryTest = Test<ModuleIsaAllocationsFixture<false>>;
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenKernelIsaWhichCouldFitInPages4KBWhenKernelImmutableDatasInitializedThenKernelIsasCanGetSeparateAllocationsDependingOnPaddingSize) {
|
||||
EXPECT_EQ(this->mockModule->isaAllocationPageSize, isaAllocationPageSize);
|
||||
|
||||
const auto requestedSize1 = 0x8;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize1);
|
||||
auto isaAllocationAlignedSize1 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize1);
|
||||
|
||||
const auto requestedSize2 = 0x4;
|
||||
this->prepareKernelInfoAndAddToTranslationUnit(requestedSize2);
|
||||
auto isaAllocationAlignedSize2 = this->mockModule->computeKernelIsaAllocationAlignedSizeWithPadding(requestedSize2);
|
||||
|
||||
// for 4kB pages, 2x isaPaddings alone could exceed isaAllocationPageSize, which precludes page sharing
|
||||
const bool isasShouldShareSamePage = (isaAllocationAlignedSize1 + isaAllocationAlignedSize2 <= isaAllocationPageSize);
|
||||
|
||||
this->mockModule->initializeKernelImmutableDatas();
|
||||
auto &kernelImmDatas = this->mockModule->getKernelImmutableDataVector();
|
||||
if (isasShouldShareSamePage) {
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation(), kernelImmDatas[0]->getIsaParentAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize1);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation(), kernelImmDatas[1]->getIsaParentAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), isaAllocationAlignedSize1);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), isaAllocationAlignedSize2);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), isaAllocationAlignedSize2);
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[0]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[0]->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSubAllocationSize(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize1));
|
||||
EXPECT_EQ(nullptr, kernelImmDatas[1]->getIsaParentAllocation());
|
||||
EXPECT_NE(nullptr, kernelImmDatas[1]->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaOffsetInParentAllocation(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSubAllocationSize(), 0lu);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaSize(), computeKernelIsaAllocationSizeWithPadding(requestedSize2));
|
||||
}
|
||||
|
||||
EXPECT_EQ(kernelImmDatas[0]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
|
||||
EXPECT_EQ(kernelImmDatas[1]->getIsaGraphicsAllocation()->getMemoryPool(), isaAllocationMemoryPool);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
|
||||
this->givenMultipleKernelIsasWhichFitInSinglePageAndDebuggerEnabledWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations<FamilyType>();
|
||||
}
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations) {
|
||||
this->givenMultipleKernelIsasWhichExceedSinglePageWhenKernelImmutableDatasAreInitializedThenKernelIsasGetSeparateAllocations();
|
||||
}
|
||||
|
||||
TEST_F(ModuleIsaAllocationsInSystemMemoryTest, givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched) {
|
||||
this->givenMultipleKernelIsasWhenKernelInitializationFailsThenItIsProperlyCleanedAndPreviouslyInitializedKernelsLeftUntouched();
|
||||
}
|
||||
|
||||
using ModuleInitializeTest = Test<DeviceFixture>;
|
||||
|
||||
TEST_F(ModuleInitializeTest, whenModuleInitializeIsCalledThenCorrectResultIsReturned) {
|
||||
@@ -3844,7 +3566,9 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
|
||||
moduleDesc.inputSize = 10;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
|
||||
auto module = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
|
||||
moduleBuildLog,
|
||||
ModuleType::User);
|
||||
module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
|
||||
module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
||||
@@ -3868,8 +3592,7 @@ TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDe
|
||||
// pass kernelInfo ownership to programInfo
|
||||
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
|
||||
auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(this->device);
|
||||
kernelImmData->setIsaPerKernelAllocation(module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
|
||||
kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer);
|
||||
|
||||
@@ -4041,7 +3764,6 @@ TEST_F(ModuleTests, whenCopyingPatchedSegmentsThenAllocationsAreSetWritableForTb
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
|
||||
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
@@ -4071,7 +3793,6 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
auto patchAddr = reinterpret_cast<uintptr_t>(ptrOffset(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer(), 0x8));
|
||||
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
@@ -4102,7 +3823,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinMo
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, true);
|
||||
|
||||
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
|
||||
@@ -4148,7 +3868,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
|
||||
auto slmInlineSizeCopy = kernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
|
||||
|
||||
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
@@ -4191,7 +3910,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
|
||||
|
||||
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
@@ -4235,7 +3953,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingModuleThe
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
|
||||
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true;
|
||||
@@ -4266,7 +3983,6 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLink
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
|
||||
|
||||
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false;
|
||||
@@ -4307,9 +4023,6 @@ TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAll
|
||||
module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
auto isaAlloc = module->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize);
|
||||
ASSERT_NE(isaAlloc, nullptr);
|
||||
kernelImmData->setIsaPerKernelAllocation(isaAlloc);
|
||||
kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false);
|
||||
module->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
{
|
||||
auto alloc = args.dispatchInterface->getIsaAllocation();
|
||||
UNRECOVERABLE_IF(nullptr == alloc);
|
||||
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
|
||||
auto offset = alloc->getGpuAddressToPatch();
|
||||
idd.setKernelStartPointer(offset);
|
||||
idd.setKernelStartPointerHigh(0u);
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
{
|
||||
auto alloc = args.dispatchInterface->getIsaAllocation();
|
||||
UNRECOVERABLE_IF(nullptr == alloc);
|
||||
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
|
||||
auto offset = alloc->getGpuAddressToPatch();
|
||||
if (!localIdsGenerationByRuntime) {
|
||||
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
|
||||
@@ -53,7 +53,6 @@ class GfxCoreHelper {
|
||||
static std::unique_ptr<GfxCoreHelper> create(const GFXCORE_FAMILY gfxCoreFamily);
|
||||
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
|
||||
virtual size_t getPaddingForISAAllocation() const = 0;
|
||||
virtual size_t getKernelIsaPointerAlignment() const = 0;
|
||||
virtual uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
virtual uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo, const ProductHelper &productHelper) = 0;
|
||||
@@ -216,10 +215,6 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
|
||||
size_t getPaddingForISAAllocation() const override;
|
||||
|
||||
size_t getKernelIsaPointerAlignment() const override {
|
||||
return static_cast<size_t>(GfxFamily::cmdInitInterfaceDescriptorData.KERNELSTARTPOINTER_ALIGN_SIZE);
|
||||
}
|
||||
|
||||
uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||
|
||||
uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||
|
||||
@@ -40,7 +40,6 @@ struct DispatchKernelEncoderI {
|
||||
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
|
||||
|
||||
virtual GraphicsAllocation *getIsaAllocation() const = 0;
|
||||
virtual uint64_t getIsaOffsetInParentAllocation() const = 0;
|
||||
virtual const uint8_t *getDynamicStateHeapData() const = 0;
|
||||
|
||||
virtual uint32_t getRequiredWorkgroupOrder() const = 0;
|
||||
|
||||
@@ -997,45 +997,6 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen
|
||||
EXPECT_NE(usedAfter, usedBefore);
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsBeforeXeHpCore) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
|
||||
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
EXPECT_EQ(idd->getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(EncodeDispatchKernelTest, givenKernelStartPointerAlignmentInInterfaceDescriptorWhenHelperGetterUsedThenCorrectValueReturned) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_ALIGN_SIZE, pDevice->getGfxCoreHelper().getKernelIsaPointerAlignment());
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeDispatchKernelTest, givenKernelsSharingISAParentAllocationsWhenProgrammingWalkerThenKernelStartPointerHasProperOffset, IsAtLeastXeHpCore) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
auto dispatchInterface = std::make_unique<MockDispatchKernelEncoder>();
|
||||
dispatchInterface->getIsaOffsetInParentAllocationResult = 8 << INTERFACE_DESCRIPTOR_DATA::KERNELSTARTPOINTER_BIT_SHIFT;
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
bool requiresUncachedMocs = false;
|
||||
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
||||
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
|
||||
EXPECT_EQ(walkerCmd->getInterfaceDescriptor().getKernelStartPointer(), dispatchInterface->getIsaAllocation()->getGpuAddressToPatch() + dispatchInterface->getIsaOffsetInParentAllocation());
|
||||
}
|
||||
|
||||
HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatchingKernelThenSurfaceStateOffsetInCrossThreadDataIsNotPatched) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
|
||||
|
||||
@@ -55,6 +55,5 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||
ADDMETHOD_CONST_NOBASE(getDynamicStateHeapData, const uint8_t *, nullptr, ());
|
||||
ADDMETHOD_CONST_NOBASE(requiresGenerationOfLocalIdsByRuntime, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getSlmPolicy, SlmPolicy, SlmPolicy::SlmPolicyNone, ());
|
||||
ADDMETHOD_CONST_NOBASE(getIsaOffsetInParentAllocation, uint64_t, 0lu, ());
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user