feature: implement ISA allocation pooling in OpenCL

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-11-17 15:26:03 +00:00
committed by Compute-Runtime-Automation
parent a627594e83
commit 1b9b78ac16
25 changed files with 824 additions and 87 deletions

View File

@@ -124,8 +124,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
if constexpr (heaplessModeEnabled == false) {
if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) {
EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, 0, rootDeviceEnvironment);
if (auto kernelAllocation = kernelInfo.getIsaGraphicsAllocation()) {
EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, kernelInfo.getIsaOffsetInParentAllocation(), rootDeviceEnvironment);
}
}

View File

@@ -515,7 +515,7 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
srcSize = getKernelHeapSize();
break;
case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL:
nonCannonizedGpuAddress = gmmHelper->decanonize(kernelInfo.kernelAllocation->getGpuAddress());
nonCannonizedGpuAddress = gmmHelper->decanonize(kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo.getIsaOffsetInParentAllocation());
pSrc = &nonCannonizedGpuAddress;
srcSize = sizeof(nonCannonizedGpuAddress);
break;
@@ -788,21 +788,23 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
pKernelInfo->isKernelHeapSubstituted = true;
auto memoryManager = executionEnvironment.memoryManager.get();
auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize();
auto currentAllocationSize = pKernelInfo->getIsaSize();
bool status = false;
auto &rootDeviceEnvironment = clDevice.getRootDeviceEnvironment();
auto &helper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation();
DEBUG_BREAK_IF(nullptr != pKernelInfo->getIsaParentAllocation());
if (currentAllocationSize >= newKernelHeapSize + isaPadding) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *pKernelInfo->getGraphicsAllocation());
auto useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *pKernelInfo->getIsaGraphicsAllocation());
status = MemoryTransferHelper::transferMemoryToAllocation(useBlitter,
clDevice.getDevice(), pKernelInfo->getGraphicsAllocation(), 0, newKernelHeap,
clDevice.getDevice(), pKernelInfo->getIsaGraphicsAllocation(), 0, newKernelHeap,
static_cast<size_t>(newKernelHeapSize));
} else {
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
pKernelInfo->kernelAllocation = nullptr;
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->getIsaGraphicsAllocation());
pKernelInfo->setIsaPerKernelAllocation(nullptr);
status = pKernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn);
}
UNRECOVERABLE_IF(!status);
@@ -1315,7 +1317,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
}
makeArgsResident(commandStreamReceiver);
auto kernelIsaAllocation = this->kernelInfo.kernelAllocation;
auto kernelIsaAllocation = this->kernelInfo.getIsaGraphicsAllocation();
if (kernelIsaAllocation) {
commandStreamReceiver.makeResident(*kernelIsaAllocation);
}
@@ -1381,8 +1383,8 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
}
}
auto kernelIsaAllocation = this->kernelInfo.kernelAllocation;
if (kernelIsaAllocation) {
if (auto kernelIsaAllocation = this->kernelInfo.getIsaGraphicsAllocation();
kernelIsaAllocation != nullptr) {
GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation);
dst.push_back(surface);
}
@@ -1922,12 +1924,13 @@ bool Kernel::hasIndirectStatelessAccessToHostMemory() const {
}
uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const {
uint64_t kernelStartOffset = 0;
if (kernelInfo.getGraphicsAllocation()) {
kernelStartOffset = returnFullAddress ? kernelInfo.getGraphicsAllocation()->getGpuAddress()
: kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
if (kernelInfo.getIsaGraphicsAllocation()) {
auto offsetInParentAllocation = kernelInfo.getIsaOffsetInParentAllocation();
kernelStartOffset = returnFullAddress ? kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + offsetInParentAllocation
: kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch() + offsetInParentAllocation;
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
}

View File

@@ -16,6 +16,7 @@
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/file_io.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
@@ -109,14 +110,16 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
exportedFunctionsKernelId = static_cast<size_t>(linkerInput->getExportedFunctionsSegmentId());
// Exported functions reside in instruction heap of one of kernels
auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
buildInfos[rootDeviceIndex].exportedFunctionsSurface = kernelInfoArray[exportedFunctionHeapId]->getGraphicsAllocation();
buildInfos[rootDeviceIndex].exportedFunctionsSurface = kernelInfoArray[exportedFunctionHeapId]->getIsaGraphicsAllocation();
auto offsetInParentAllocation = kernelInfoArray[exportedFunctionHeapId]->getIsaOffsetInParentAllocation();
auto &compilerProductHelper = pDevice->getCompilerProductHelper();
if (compilerProductHelper.isHeaplessModeEnabled(pDevice->getHardwareInfo())) {
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddress());
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddress() + offsetInParentAllocation);
} else {
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddressToPatch());
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddressToPatch() + offsetInParentAllocation);
}
exportedFunctions.segmentSize = buildInfos[rootDeviceIndex].exportedFunctionsSurface->getUnderlyingBufferSize();
exportedFunctions.segmentSize = kernelInfoArray[exportedFunctionHeapId]->getIsaSize();
}
Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage;
@@ -128,8 +131,8 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
DEBUG_BREAK_IF(nullptr == kernelInfo->getGraphicsAllocation());
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelInfo->getGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
DEBUG_BREAK_IF(nullptr == kernelInfo->getIsaGraphicsAllocation());
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddressToPatch() + kernelInfo->getIsaOffsetInParentAllocation()), kernHeapInfo.kernelHeapSize});
kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
}
}
@@ -151,16 +154,8 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
updateBuildLog(pDevice->getRootDeviceIndex(), error.c_str(), error.size());
return CL_INVALID_BINARY;
} else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
for (auto kernelId = 0u; kernelId < kernelInfoArray.size(); kernelId++) {
const auto &kernelInfo = kernelInfoArray[kernelId];
auto &kernHeapInfo = kernelInfo->heapInfo;
auto segmentId = &kernelInfo - &kernelInfoArray[0];
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
const auto &productHelper = pDevice->getProductHelper();
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelInfo->getGraphicsAllocation()),
*pDevice, kernelInfo->getGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer,
static_cast<size_t>(kernHeapInfo.kernelHeapSize));
}
[[maybe_unused]] auto success = transferIsaSegmentsToAllocation(pDevice, kernelInfoArray, &isaSegmentsForPatching, rootDeviceIndex);
DEBUG_BREAK_IF(!success);
}
DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->getSymbols(pDevice->getRootDeviceIndex())));
return CL_SUCCESS;
@@ -328,19 +323,11 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) {
}
buildInfos[rootDeviceIndex].kernelMiscInfoPos = src.kernelMiscInfoPos;
for (auto &kernelInfo : kernelInfoArray) {
cl_int retVal = CL_SUCCESS;
if (kernelInfo->heapInfo.kernelHeapSize) {
retVal = kernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
}
if (retVal != CL_SUCCESS) {
if (auto retVal = setIsaGraphicsAllocations(clDevice.getDevice(), kernelInfoArray, deviceInfoConstants, rootDeviceIndex);
retVal != CL_SUCCESS) {
return retVal;
}
kernelInfo->apply(deviceInfoConstants);
}
indirectDetectionVersion = src.indirectDetectionVersion;
indirectAccessBufferMajorVersion = src.indirectAccessBufferMajorVersion;
@@ -383,8 +370,16 @@ Zebin::Debug::Segments Program::getZebinSegments(uint32_t rootDeviceIndex) {
buildInfos[rootDeviceIndex].constStringSectionData.size};
std::vector<NEO::Zebin::Debug::Segments::KernelNameIsaTupleT> kernels;
for (const auto &kernelInfo : buildInfos[rootDeviceIndex].kernelInfoArray) {
NEO::Zebin::Debug::Segments::Segment segment;
if (kernelInfo->getIsaParentAllocation()) {
segment.address = static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo->getIsaOffsetInParentAllocation());
segment.size = kernelInfo->getIsaSubAllocationSize();
} else {
segment.address = static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddress());
segment.size = kernelInfo->getIsaGraphicsAllocation()->getUnderlyingBufferSize();
}
NEO::Zebin::Debug::Segments::Segment segment = {static_cast<uintptr_t>(kernelInfo->getGraphicsAllocation()->getGpuAddress()), kernelInfo->getGraphicsAllocation()->getUnderlyingBufferSize()};
kernels.push_back({kernelInfo->kernelDescriptor.kernelMetadata.kernelName, segment});
}
return Zebin::Debug::Segments(getGlobalSurface(rootDeviceIndex), getConstantSurface(rootDeviceIndex), strings, kernels);

View File

@@ -19,6 +19,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/addressing_mode_helper.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_options_parser.h"
#include "shared/source/helpers/compiler_product_helper.h"
@@ -29,10 +30,13 @@
#include "shared/source/os_interface/os_context.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/program/metadata_generation.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include <algorithm>
namespace NEO {
Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevicesIn) : executionEnvironment(*clDevicesIn[0]->getExecutionEnvironment()),
@@ -301,6 +305,172 @@ cl_int Program::createProgramFromBinary(
return retVal;
}
bool Program::isIsaPoolingEnabled(Device &neoDevice) {
if (auto dbgFlag = debugManager.flags.EnableIsaAllocationPool.get(); dbgFlag != -1) {
return static_cast<bool>(dbgFlag);
}
return neoDevice.getProductHelper().is2MBLocalMemAlignmentEnabled() &&
nullptr == neoDevice.getL0Debugger() &&
false == gtpinIsGTPinInitialized() &&
false == neoDevice.getMemoryManager()->isKernelBinaryReuseEnabled();
}
cl_int Program::setIsaGraphicsAllocations(Device &neoDevice, std::vector<KernelInfo *> &kernelInfoArray, DeviceInfoKernelPayloadConstants &deviceInfoConstants, uint32_t rootDeviceIndex) {
for (auto &kernelInfo : kernelInfoArray) {
kernelInfo->apply(deviceInfoConstants);
}
std::vector<KernelInfo *> validKernelInfos;
validKernelInfos.reserve(kernelInfoArray.size());
std::ranges::copy_if(kernelInfoArray,
std::back_inserter(validKernelInfos),
[](const KernelInfo *info) { return info->heapInfo.kernelHeapSize != 0; });
DEBUG_BREAK_IF(validKernelInfos.size() != kernelInfoArray.size());
if (validKernelInfos.empty()) {
return CL_SUCCESS;
}
if (isIsaPoolingEnabled(neoDevice)) {
const size_t kernelsCount = validKernelInfos.size();
std::vector<std::pair<size_t, size_t>> kernelsChunks(kernelsCount);
size_t kernelsIsaTotalSize = 0u;
for (size_t i = 0; i < kernelsCount; i++) {
auto chunkOffset = kernelsIsaTotalSize;
auto chunkSize = computeKernelIsaAllocationAlignedSizeWithPadding(neoDevice, validKernelInfos[i]->heapInfo.kernelHeapSize, ((i + 1) == kernelsCount));
kernelsIsaTotalSize += chunkSize;
kernelsChunks[i] = {chunkOffset, chunkSize};
}
auto &isaAllocator = neoDevice.getIsaPoolAllocator();
auto crossProgramAllocation = isaAllocator.requestGraphicsAllocationForIsa(isBuiltIn, kernelsIsaTotalSize);
if (crossProgramAllocation == nullptr) {
return CL_OUT_OF_HOST_MEMORY;
}
auto &sharedIsaAllocation = buildInfos[rootDeviceIndex].sharedIsaAllocation;
sharedIsaAllocation.reset(crossProgramAllocation);
for (size_t i = 0; i < kernelsCount; i++) {
auto [isaOffset, isaSize] = kernelsChunks[i];
validKernelInfos[i]->setIsaParentAllocation(sharedIsaAllocation->getGraphicsAllocation());
validKernelInfos[i]->setIsaSubAllocationOffset(sharedIsaAllocation->getOffset() + isaOffset);
validKernelInfos[i]->setIsaSubAllocationSize(isaSize);
}
if (!transferIsaSegmentsToAllocation(&neoDevice, validKernelInfos, nullptr, rootDeviceIndex)) {
return CL_OUT_OF_HOST_MEMORY;
}
} else {
for (auto &kernelInfo : validKernelInfos) {
if (!kernelInfo->createKernelAllocation(neoDevice, isBuiltIn)) {
return CL_OUT_OF_HOST_MEMORY;
}
}
}
return CL_SUCCESS;
}
bool Program::transferIsaSegmentsToAllocation(Device *pDevice, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching, uint32_t rootDeviceIndex) {
const auto &productHelper = pDevice->getProductHelper();
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &sharedIsaAllocation = buildInfos[rootDeviceIndex].sharedIsaAllocation;
if (sharedIsaAllocation) {
const auto isaBufferSize = sharedIsaAllocation->getSize();
DEBUG_BREAK_IF(isaBufferSize == 0);
std::vector<std::byte> isaBuffer(isaBufferSize);
std::memset(isaBuffer.data(), 0x0, isaBufferSize);
auto programOffset = sharedIsaAllocation->getOffset();
for (auto &kernelInfo : kernelInfoArray) {
kernelInfo->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
kernelInfo->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
auto [kernelHeapPtr, kernelHeapSize] = getKernelHeapPointerAndSize(kernelInfo, kernelInfoArray, isaSegmentsForPatching);
auto isaOffset = kernelInfo->getIsaOffsetInParentAllocation() - programOffset;
memcpy_s(isaBuffer.data() + isaOffset,
isaBufferSize - isaOffset,
kernelHeapPtr,
kernelHeapSize);
}
auto programAllocation = sharedIsaAllocation->getGraphicsAllocation();
auto lock = sharedIsaAllocation->obtainSharedAllocationLock();
auto success = NEO::MemoryTransferHelper::transferMemoryToAllocation(
productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *programAllocation),
*pDevice,
programAllocation,
programOffset,
isaBuffer.data(),
isaBuffer.size());
if (pDevice->getDefaultEngine().commandStreamReceiver->getType() != NEO::CommandStreamReceiverType::hardware) {
pDevice->getDefaultEngine().commandStreamReceiver->writeMemory(*programAllocation);
}
return success;
} else {
if (pDevice->getMemoryManager()->isKernelBinaryReuseEnabled()) {
return true;
}
for (auto &kernelInfo : kernelInfoArray) {
auto [kernelHeapPtr, kernelHeapSize] = getKernelHeapPointerAndSize(kernelInfo, kernelInfoArray, isaSegmentsForPatching);
if (nullptr == kernelInfo->getIsaGraphicsAllocation() || 0 == kernelHeapSize) {
continue;
}
DEBUG_BREAK_IF(0 == kernelInfo->heapInfo.kernelHeapSize);
DEBUG_BREAK_IF(kernelInfo->getIsaGraphicsAllocation() == kernelInfo->getIsaParentAllocation());
bool success = MemoryTransferHelper::transferMemoryToAllocation(
productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelInfo->getIsaGraphicsAllocation()),
*pDevice,
kernelInfo->getIsaGraphicsAllocation(),
0,
kernelHeapPtr,
kernelHeapSize);
if (!success) {
return false;
}
}
}
return true;
}
std::pair<const void *, size_t> Program::getKernelHeapPointerAndSize(KernelInfo *const &kernelInfo, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching) {
if (isaSegmentsForPatching) {
auto &segments = *isaSegmentsForPatching;
auto segmentId = &kernelInfo - &kernelInfoArray[0];
return {segments[segmentId].hostPointer, segments[segmentId].segmentSize};
} else {
return {kernelInfo->heapInfo.pKernelHeap, static_cast<size_t>(kernelInfo->heapInfo.kernelHeapSize)};
}
}
size_t Program::computeKernelIsaAllocationAlignedSizeWithPadding(const Device &neoDevice, size_t isaSize, bool lastKernel) {
auto isaPadding = lastKernel ? neoDevice.getGfxCoreHelper().getPaddingForISAAllocation() : 0u;
auto kernelStartPointerAlignment = neoDevice.getGfxCoreHelper().getKernelIsaPointerAlignment();
auto isaAllocationSize = alignUp(isaPadding + isaSize, kernelStartPointerAlignment);
return isaAllocationSize;
}
GraphicsAllocation *Program::getKernelsIsaParentAllocation(uint32_t rootDeviceIndex) const {
if (!buildInfos[rootDeviceIndex].sharedIsaAllocation) {
return nullptr;
}
return buildInfos[rootDeviceIndex].sharedIsaAllocation->getGraphicsAllocation();
}
cl_int Program::setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) {
if (!isSpirV) {
return CL_INVALID_PROGRAM;
@@ -381,11 +551,11 @@ const char *Program::getBuildLog(uint32_t rootDeviceIndex) const {
void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
auto &buildInfo = buildInfos[rootDeviceIndex];
for (auto &kernelInfo : buildInfo.kernelInfoArray) {
if (kernelInfo->kernelAllocation) {
if (kernelInfo->getIsaGraphicsAllocation()) {
// register cache flush in all csrs where kernel allocation was used
for (auto &engine : this->executionEnvironment.memoryManager->getRegisteredEngines(rootDeviceIndex)) {
auto contextId = engine.osContext->getContextId();
if (kernelInfo->kernelAllocation->isUsedByOsContext(contextId)) {
if (kernelInfo->getIsaGraphicsAllocation()->isUsedByOsContext(contextId)) {
engine.commandStreamReceiver->registerInstructionCacheFlush();
}
}
@@ -403,13 +573,26 @@ void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
}
}
} else {
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
if (!buildInfo.sharedIsaAllocation) {
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->getIsaGraphicsAllocation());
}
}
}
delete kernelInfo;
}
buildInfo.kernelInfoArray.clear();
metadataGeneration.reset(new MetadataGeneration());
if (buildInfo.sharedIsaAllocation) {
for (auto &device : clDevices) {
auto &isaAllocator = device->getDevice().getIsaPoolAllocator();
if (isaAllocator.isPoolBuffer(buildInfo.sharedIsaAllocation->getGraphicsAllocation())) {
isaAllocator.freeSharedIsaAllocation(buildInfo.sharedIsaAllocation.release());
break;
}
}
DEBUG_BREAK_IF(buildInfo.sharedIsaAllocation);
}
}
void Program::updateNonUniformFlag() {
@@ -666,9 +849,15 @@ StackVec<NEO::GraphicsAllocation *, 32> Program::getModuleAllocations(uint32_t r
StackVec<NEO::GraphicsAllocation *, 32> allocs;
auto &kernelInfoArray = buildInfos[rootIndex].kernelInfoArray;
if (auto isaParentAllocation = this->getKernelsIsaParentAllocation(rootIndex);
isaParentAllocation != nullptr) {
allocs.push_back(isaParentAllocation);
} else {
for (const auto &kernelInfo : kernelInfoArray) {
allocs.push_back(kernelInfo->getGraphicsAllocation());
allocs.push_back(kernelInfo->getIsaGraphicsAllocation());
}
}
GraphicsAllocation *globalsForPatching = getGlobalSurfaceGA(rootIndex);
GraphicsAllocation *constantsForPatching = getConstantSurfaceGA(rootIndex);

View File

@@ -37,7 +37,9 @@ struct MetadataGeneration;
struct KernelInfo;
enum class DecodeError : uint8_t;
struct ExternalFunctionInfo;
class SharedPoolAlloction;
class SharedPoolAllocation;
struct DeviceInfoKernelPayloadConstants;
class ProductHelper;
template <>
struct OpenCLObjectMapper<_cl_program> {
@@ -196,6 +198,31 @@ class Program : public BaseObject<_cl_program> {
NEO::GraphicsAllocation *getGlobalSurfaceGA(uint32_t rootDeviceIndex) const;
NEO::GraphicsAllocation *getExportedFunctionsSurface(uint32_t rootDeviceIndex) const;
MOCKABLE_VIRTUAL bool isIsaPoolingEnabled(Device &neoDevice);
cl_int setIsaGraphicsAllocations(
Device &neoDevice,
std::vector<KernelInfo *> &kernelInfoArray,
DeviceInfoKernelPayloadConstants &deviceInfoConstants,
uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL bool transferIsaSegmentsToAllocation(
Device *pDevice,
std::vector<KernelInfo *> &kernelInfoArray,
const Linker::PatchableSegments *isaSegmentsForPatching,
uint32_t rootDeviceIndex);
std::pair<const void *, size_t> getKernelHeapPointerAndSize(
KernelInfo *const &kernelInfo,
std::vector<KernelInfo *> &kernelInfoArray,
const Linker::PatchableSegments *isaSegmentsForPatching);
size_t computeKernelIsaAllocationAlignedSizeWithPadding(
const Device &neoDevice,
size_t isaSize,
bool lastKernel);
GraphicsAllocation *getKernelsIsaParentAllocation(uint32_t rootDeviceIndex) const;
void cleanCurrentKernelInfo(uint32_t rootDeviceIndex);
const std::string &getOptions() const { return options; }
@@ -331,6 +358,7 @@ class Program : public BaseObject<_cl_program> {
std::vector<KernelInfo *> kernelInfoArray;
std::unique_ptr<NEO::SharedPoolAllocation> constantSurface;
std::unique_ptr<NEO::SharedPoolAllocation> globalSurface;
std::unique_ptr<NEO::SharedPoolAllocation> sharedIsaAllocation;
GraphicsAllocation *exportedFunctionsSurface = nullptr;
size_t globalVarTotalSize = 0U;
std::unique_ptr<LinkerInput> linkerInput;

View File

@@ -379,8 +379,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, WhenEnqueueDimension
EXPECT_EQ(1u, walker->getGenerateLocalId());
constexpr bool isHeapless = FamilyType::template isHeaplessMode<WalkerType>();
auto kernelAllocationGpuAddr = isHeapless ? kernel->getKernelInfo().kernelAllocation->getGpuAddress()
: kernel->getKernelInfo().kernelAllocation->getGpuAddressToPatch();
auto offsetInParentAllocation = kernel->getKernelInfo().getIsaOffsetInParentAllocation();
auto kernelAllocationGpuAddr = isHeapless ? kernel->getKernelInfo().getIsaGraphicsAllocation()->getGpuAddress() + offsetInParentAllocation
: kernel->getKernelInfo().getIsaGraphicsAllocation()->getGpuAddressToPatch() + offsetInParentAllocation;
auto skipOffset = kernel->getKernelInfo().kernelDescriptor.entryPoints.skipPerThreadDataLoad;
uint64_t kernelStartPointer = kernelAllocationGpuAddr + skipOffset;

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
@@ -1236,6 +1237,12 @@ HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnq
if (it->first == bufferAllocation) {
expected = 3u;
}
// Shared parent isa allocation for all kernels
if (pDevice->getIsaPoolAllocator().isPoolBuffer(it->first)) {
expected = 2u;
}
EXPECT_EQ(expected, it->second);
}

View File

@@ -941,6 +941,10 @@ TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned
TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) {
USE_REAL_FILE_SYSTEM();
DebugManagerStateRestore dbgRestore;
debugManager.flags.EnableIsaAllocationPool.set(false);
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
ASSERT_NE(nullptr, svmData);
GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -116,15 +116,22 @@ void KernelDataTest::buildAndDecode() {
EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pSsh, pSsh, sshSize));
}
if (kernelHeapSize) {
auto kernelAllocation = pKernelInfo->getGraphicsAllocation();
auto kernelAllocation = pKernelInfo->getIsaGraphicsAllocation();
UNRECOVERABLE_IF(kernelAllocation == nullptr);
auto &device = pContext->getDevice(0)->getDevice();
auto &helper = device.getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation();
EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize + isaPadding);
size_t expectedIsaSize = kernelHeapSize + isaPadding;
if (program->getKernelsIsaParentAllocation(rootDeviceIndex)) {
expectedIsaSize = alignUp(expectedIsaSize, pContext->getDevice(0)->getDevice().getGfxCoreHelper().getKernelIsaPointerAlignment());
}
EXPECT_EQ(pKernelInfo->getIsaSize(), expectedIsaSize);
auto kernelIsa = kernelAllocation->getUnderlyingBuffer();
if (pKernelInfo->getIsaParentAllocation()) {
kernelIsa = ptrOffset(kernelIsa, pKernelInfo->getIsaOffsetInParentAllocation());
}
EXPECT_EQ(0, memcmp(kernelIsa, pKernelInfo->heapInfo.pKernelHeap, kernelHeapSize));
} else {
EXPECT_EQ(nullptr, pKernelInfo->getGraphicsAllocation());
EXPECT_EQ(nullptr, pKernelInfo->getIsaGraphicsAllocation());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -60,8 +60,9 @@ class KernelDataTest : public testing::Test {
}
void TearDown() override {
if (pKernelInfo->kernelAllocation) {
pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(pKernelInfo->kernelAllocation);
auto isaAllocation = pKernelInfo->getIsaGraphicsAllocation();
if (isaAllocation && !pKernelInfo->getIsaParentAllocation()) {
pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(isaAllocation);
const_cast<KernelInfo *>(pKernelInfo)->kernelAllocation = nullptr;
}
program.reset();

View File

@@ -170,6 +170,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
public:
void setUp() {
debugManager.flags.GTPinAllocateBufferInSharedMemory.set(false);
debugManager.flags.EnableIsaAllocationPool.set(false);
setUpImpl();
}

View File

@@ -219,7 +219,8 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs
EXPECT_EQ(CL_SUCCESS, retVal);
auto gmmHelper = pDevice->getGmmHelper();
auto expectedGpuAddress = gmmHelper->decanonize(kernel->getKernelInfo().kernelAllocation->getGpuAddress());
auto &kernelInfo = kernel->getKernelInfo();
auto expectedGpuAddress = gmmHelper->decanonize(kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo.getIsaOffsetInParentAllocation());
EXPECT_EQ(expectedGpuAddress, paramValue);
EXPECT_EQ(paramValueSize, paramValueSizeRet);
}
@@ -2900,11 +2901,11 @@ TEST(KernelInfoTest, givenGfxCoreHelperWhenCreatingKernelAllocationThenCorrectPa
mockKernel->kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
mockKernel->kernelInfo.createKernelAllocation(clDevice->getDevice(), false);
auto graphicsAllocation = mockKernel->kernelInfo.getGraphicsAllocation();
auto graphicsAllocation = mockKernel->kernelInfo.getIsaGraphicsAllocation();
auto &helper = clDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation();
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), mockKernel->kernelInfo.heapInfo.kernelHeapSize + isaPadding);
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
}
TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) {
@@ -3469,12 +3470,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, true, false, false);
EXPECT_EQ(allocationOffset + 256u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
}
TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenReturnFullAddress) {
@@ -3485,9 +3486,9 @@ TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenRetur
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
auto address = mockKernel.mockKernel->getKernelStartAddress(false, true, false, true);
EXPECT_EQ(mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddress(), address);
EXPECT_EQ(mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddress(), address);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
@@ -3498,12 +3499,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartAddress(true, true, false, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
}
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
@@ -3514,12 +3515,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
mockKernel.mockKernel->setStartOffset(128);
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, false, false, false);
EXPECT_EQ(allocationOffset + 128u, offset);
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
}
TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) {
@@ -4163,3 +4164,89 @@ TEST(KernelTest, whenCallingGetEnqueuedLocalWorkSizeValuesThenReturnProperValues
EXPECT_EQ(expectedELWS[1], *(enqueuedLocalWorkSize[1]));
EXPECT_EQ(expectedELWS[2], *(enqueuedLocalWorkSize[2]));
}
class KernelInfoIsaAllocationTest : public ::testing::Test {
protected:
void SetUp() override {
standaloneAllocation = std::make_unique<MockGraphicsAllocation>();
parentAllocation = std::make_unique<MockGraphicsAllocation>();
kernelInfo = std::make_unique<MockKernelInfo>();
}
void TearDown() override {}
std::unique_ptr<MockGraphicsAllocation> standaloneAllocation;
std::unique_ptr<MockGraphicsAllocation> parentAllocation;
std::unique_ptr<MockKernelInfo> kernelInfo;
};
TEST_F(KernelInfoIsaAllocationTest, givenStandaloneAllocationWhenQueryingIsaPropertiesThenCorrectValuesReturned) {
constexpr size_t allocationSize = 4096u;
standaloneAllocation->setSize(allocationSize);
kernelInfo->setIsaPerKernelAllocation(standaloneAllocation.get());
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getGraphicsAllocation());
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
EXPECT_EQ(allocationSize, kernelInfo->getIsaSize());
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_EQ(0u, kernelInfo->getIsaSubAllocationSize());
}
TEST_F(KernelInfoIsaAllocationTest, givenParentAllocationWhenSettingIsaPropertiesThenCorrectValuesReturned) {
constexpr size_t offset = 1024u;
constexpr size_t subAllocationSize = 2048u;
kernelInfo->setIsaParentAllocation(parentAllocation.get());
kernelInfo->setIsaSubAllocationOffset(offset);
kernelInfo->setIsaSubAllocationSize(subAllocationSize);
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaParentAllocation());
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
EXPECT_EQ(subAllocationSize, kernelInfo->getIsaSize());
EXPECT_EQ(offset, kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_EQ(subAllocationSize, kernelInfo->getIsaSubAllocationSize());
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
// Test updating offset and size
constexpr size_t newOffset = 2048u;
constexpr size_t newSize = 4096u;
kernelInfo->setIsaSubAllocationOffset(newOffset);
kernelInfo->setIsaSubAllocationSize(newSize);
EXPECT_EQ(newOffset, kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_EQ(newSize, kernelInfo->getIsaSubAllocationSize());
EXPECT_EQ(newSize, kernelInfo->getIsaSize());
}
TEST_F(KernelInfoIsaAllocationTest, givenNoAllocationWhenQueryingIsaPropertiesThenDefaultValuesReturned) {
EXPECT_EQ(nullptr, kernelInfo->getGraphicsAllocation());
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_EQ(0u, kernelInfo->getIsaSubAllocationSize());
}
TEST_F(KernelInfoIsaAllocationTest, givenTransitionBetweenAllocationTypesWhenChangingPropertiesThenCorrectStateIsMaintained) {
constexpr size_t standaloneSize = 4096u;
constexpr size_t pooledOffset = 512u;
constexpr size_t pooledSize = 2048u;
// Start with standalone
standaloneAllocation->setSize(standaloneSize);
kernelInfo->setIsaPerKernelAllocation(standaloneAllocation.get());
EXPECT_EQ(standaloneSize, kernelInfo->getIsaSize());
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
// Transition to pooled
kernelInfo->kernelAllocation = nullptr;
kernelInfo->setIsaParentAllocation(parentAllocation.get());
kernelInfo->setIsaSubAllocationOffset(pooledOffset);
kernelInfo->setIsaSubAllocationSize(pooledSize);
EXPECT_EQ(pooledSize, kernelInfo->getIsaSize());
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
EXPECT_EQ(pooledOffset, kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
}

View File

@@ -233,9 +233,11 @@ class MockProgram : public Program {
}
void debugNotify(const ClDeviceVector &deviceVector, std::unordered_map<uint32_t, BuildPhase> &phasesReached) override {
if (callBaseDebugNotify) {
Program::debugNotify(deviceVector, phasesReached);
wasDebuggerNotified = true;
}
}
void callPopulateZebinExtendedArgsMetadataOnce(uint32_t rootDeviceIndex) override {
wasPopulateZebinExtendedArgsMetadataOnceCalled = true;
@@ -245,18 +247,35 @@ class MockProgram : public Program {
}
}
bool transferIsaSegmentsToAllocation(Device *pDevice, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching, uint32_t rootDeviceIndex) override {
if (transferIsaSegmentsToAllocationOverride != -1) {
return (transferIsaSegmentsToAllocationOverride > 0);
}
return Program::transferIsaSegmentsToAllocation(pDevice, kernelInfoArray, isaSegmentsForPatching, rootDeviceIndex);
}
bool isIsaPoolingEnabled(Device &neoDevice) override {
if (isIsaPoolingEnabledOverride != -1) {
return (isIsaPoolingEnabledOverride > 0);
}
return Program::isIsaPoolingEnabled(neoDevice);
}
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
std::map<uint32_t, int> processGenBinaryCalledPerRootDevice;
std::map<uint32_t, int> replaceDeviceBinaryCalledPerRootDevice;
static int getInternalOptionsCalled;
int isFlagOptionOverride = -1;
int isOptionValueValidOverride = -1;
int transferIsaSegmentsToAllocationOverride = -1;
int isIsaPoolingEnabledOverride = -1;
bool contextSet = false;
bool wasProcessDebugDataCalled = false;
bool wasCreateDebugZebinCalled = false;
bool wasDebuggerNotified = false;
bool wasPopulateZebinExtendedArgsMetadataOnceCalled = false;
bool callBasePopulateZebinExtendedArgsMetadataOnce = false;
bool callBaseDebugNotify = true;
auto getIntermediateRepresentation() const { return this->intermediateRepresentation; }
auto getIsGeneratedByIgc() const { return this->isGeneratedByIgc; }
auto &getBuildInfos() { return this->buildInfos; }

View File

@@ -432,6 +432,9 @@ HWTEST2_TEMPLATED_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageI
GTEST_SKIP();
}
DebugManagerStateRestore dbgRestore;
debugManager.flags.EnableIsaAllocationPool.set(false);
device->setPreemptionMode(PreemptionMode::Disabled);
mock->ioctlExpected.gemCreate = 1;

View File

@@ -838,7 +838,7 @@ TEST(ProgramLinkBinaryTest, whenLinkerUnresolvedExternalThenLinkFailedAndBuildLo
expectedUnresolvedExternals.push_back(Linker::UnresolvedExternal{relocation, 0, false});
auto expectedError = constructLinkerErrorMessage(expectedUnresolvedExternals, std::vector<std::string>{"kernel : " + kernelInfo.kernelDescriptor.kernelMetadata.kernelName});
EXPECT_TRUE(hasSubstr(buildLog, expectedError));
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.getGraphicsAllocation());
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.getIsaGraphicsAllocation());
}
HWTEST2_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched, MatchAny) {
@@ -978,7 +978,7 @@ TEST(ProgramStringSectionTest, WhenConstStringBufferIsPresentThenUseItForLinking
program.setLinkerInput(rootDeviceIndex, std::move(linkerInput));
auto isaCpuPtr = reinterpret_cast<char *>(kernelInfo.getGraphicsAllocation()->getUnderlyingBuffer());
auto isaCpuPtr = reinterpret_cast<char *>(kernelInfo.getIsaGraphicsAllocation()->getUnderlyingBuffer());
auto patchAddr = ptrOffset(isaCpuPtr, 0x8);
const char constStringData[] = "Hello World!\n";

View File

@@ -41,10 +41,12 @@
#include "shared/test/common/mocks/mock_ail_configuration.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_compiler_interface.h"
#include "shared/test/common/mocks/mock_debugger.h"
#include "shared/test/common/mocks/mock_elf.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_modules_zebin.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/mocks/mock_tbx_csr.h"
#include "shared/test/common/mocks/mock_usm_memory_pool.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/utilities/base_object_utils.h"
@@ -511,12 +513,67 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphi
pProgram->build(pProgram->getDevices(), nullptr);
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
auto graphicsAllocation = kernelInfo->getGraphicsAllocation();
auto graphicsAllocation = kernelInfo->getIsaGraphicsAllocation();
ASSERT_NE(nullptr, graphicsAllocation);
EXPECT_TRUE(graphicsAllocation->is32BitAllocation());
auto &helper = pDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
size_t isaPadding = helper.getPaddingForISAAllocation();
bool isIsaPooled = (pProgram->getKernelsIsaParentAllocation(rootDeviceIndex) != nullptr);
if (!isIsaPooled) {
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.kernelHeapSize + isaPadding);
}
EXPECT_EQ(kernelInfo->getIsaSize(), kernelInfo->heapInfo.kernelHeapSize + isaPadding);
auto kernelIsa = ptrOffset(graphicsAllocation->getUnderlyingBuffer(), kernelInfo->getIsaOffsetInParentAllocation());
EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa);
EXPECT_EQ(0, memcmp(kernelIsa, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.kernelHeapSize));
auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex();
auto gmmHelper = pDevice->getGmmHelper();
EXPECT_EQ(gmmHelper->decanonize(graphicsAllocation->getGpuBaseAddress()), pDevice->getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, graphicsAllocation->isAllocatedInLocalMemoryPool()));
}
class ProgramFromBinaryIsaPoolingTest : public ProgramFromBinaryTest {
public:
void SetUp() override {
ProgramFromBinaryFixture::SetUp();
enableIsaPooling();
}
void TearDown() override {
ProgramFromBinaryFixture::TearDown();
}
void enableIsaPooling() {
pProgram->isIsaPoolingEnabledOverride = 1;
}
void disableIsaPooling() {
pProgram->isIsaPoolingEnabledOverride = 0;
}
void defaultIsaPooling() {
pProgram->isIsaPoolingEnabledOverride = -1;
}
};
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramThenIsaAllocationIsPartOfSharedParentAllocation) {
pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_NE(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
auto graphicsAllocation = kernelInfo->getIsaGraphicsAllocation();
ASSERT_NE(nullptr, graphicsAllocation);
EXPECT_TRUE(graphicsAllocation->is32BitAllocation());
EXPECT_TRUE(pDevice->getIsaPoolAllocator().isPoolBuffer(graphicsAllocation));
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
auto expectedIsaSize = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, kernelInfo->heapInfo.kernelHeapSize, true);
EXPECT_EQ(expectedIsaSize, kernelInfo->getIsaSize());
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
auto kernelIsa = graphicsAllocation->getUnderlyingBuffer();
EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa);
@@ -526,6 +583,53 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphi
EXPECT_EQ(gmmHelper->decanonize(graphicsAllocation->getGpuBaseAddress()), pDevice->getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, graphicsAllocation->isAllocatedInLocalMemoryPool()));
}
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramAndTransferIsaSegmentsToAllocationFailsThenReturnOutOfHostMemory) {
pProgram->transferIsaSegmentsToAllocationOverride = 0;
retVal = pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
}
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramAndRequestGraphicsAllocationForIsaFailsThenReturnOutOfHostMemory) {
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
memoryManager->isMockHostMemoryManager = true;
memoryManager->forceFailureInPrimaryAllocation = true;
retVal = pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
EXPECT_EQ(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
}
TEST_F(ProgramFromBinaryIsaPoolingTest, givenDisabledIsaAllocationPoolWhenBuildingProgramAndCreateKernelAllocationFailsThenReturnOutOfHostMemory) {
disableIsaPooling();
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
memoryManager->isMockHostMemoryManager = true;
memoryManager->forceFailureInPrimaryAllocation = true;
retVal = pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
}
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabled2MBLocalMemAlignmentGTPinInitializedWhenBuildingProgramThenIsaAllocationPoolIsNotUsed) {
defaultIsaPooling();
pProgram->callBaseDebugNotify = false;
auto mockProductHelper = new MockProductHelper;
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
isGTPinInitialized = true;
pProgram->build(pProgram->getDevices(), nullptr);
isGTPinInitialized = false;
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
EXPECT_EQ(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
EXPECT_NE(nullptr, kernelInfo->kernelAllocation);
EXPECT_FALSE(pDevice->getIsaPoolAllocator().isPoolBuffer(kernelInfo->getIsaGraphicsAllocation()));
}
TEST_F(ProgramFromBinaryTest, whenProgramIsBeingRebuildThenOutdatedGlobalBuffersAreFreed) {
pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface);
@@ -695,6 +799,33 @@ TEST_F(ProgramFromBinaryTest, givenProgramWithGlobalAndConstAllocationsWhenGetti
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
auto allocs = pProgram->getModuleAllocations(pClDevice->getRootDeviceIndex());
auto expectedSize = 2u + (pProgram->getKernelsIsaParentAllocation(pClDevice->getRootDeviceIndex()) ? 1u : pProgram->getNumKernels());
EXPECT_EQ(expectedSize, allocs.size());
auto iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface->getGraphicsAllocation());
EXPECT_NE(allocs.end(), iter);
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation());
EXPECT_NE(allocs.end(), iter);
if (auto isaParentAllocation = pProgram->getKernelsIsaParentAllocation(pClDevice->getRootDeviceIndex());
isaParentAllocation != nullptr) {
iter = std::find(allocs.begin(), allocs.end(), isaParentAllocation);
EXPECT_NE(allocs.end(), iter);
} else {
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getIsaGraphicsAllocation());
EXPECT_NE(allocs.end(), iter);
}
}
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolAndProgramWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) {
pProgram->build(pProgram->getDevices(), nullptr);
pProgram->processGenBinary(*pClDevice);
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
auto allocs = pProgram->getModuleAllocations(pClDevice->getRootDeviceIndex());
EXPECT_EQ(pProgram->getNumKernels() + 2u, allocs.size());
@@ -704,10 +835,127 @@ TEST_F(ProgramFromBinaryTest, givenProgramWithGlobalAndConstAllocationsWhenGetti
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation());
EXPECT_NE(allocs.end(), iter);
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getGraphicsAllocation());
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getIsaParentAllocation());
EXPECT_NE(allocs.end(), iter);
}
HWTEST_F(ProgramFromBinaryIsaPoolingTest, givenTbxModeAndPooledIsaWhenTransferringSegmentsThenWriteMemoryIsCalled) {
auto tbxCsr = new MockTbxCsr<FamilyType>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(tbxCsr);
retVal = pProgram->build(pProgram->getDevices(), nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
EXPECT_TRUE(tbxCsr->writeMemoryGfxAllocCalled);
}
class ProgramIsaPoolingEnabledTest : public ProgramFromBinaryTest {
public:
void SetUp() override {
ProgramFromBinaryTest::SetUp();
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
}
void TearDown() override {
ProgramFromBinaryTest::TearDown();
}
MockProductHelper *mockProductHelper = new MockProductHelper;
DebugManagerStateRestore restorer;
};
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagSetWhenCheckingIsaPoolingThenReturnCorrectValue) {
{
debugManager.flags.EnableIsaAllocationPool.set(1);
EXPECT_TRUE(pProgram->isIsaPoolingEnabled(*pDevice));
}
{
debugManager.flags.EnableIsaAllocationPool.set(0);
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
}
}
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAnd2MBAlignmentDisabledWhenCheckingIsaPoolingThenReturnFalse) {
debugManager.flags.EnableIsaAllocationPool.set(-1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
}
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndL0DebuggerPresentWhenCheckingIsaPoolingThenReturnFalse) {
debugManager.flags.EnableIsaAllocationPool.set(-1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
pDevice->getRootDeviceEnvironmentRef().debugger.reset(new MockDebugger);
pDevice->setDebugger(pDevice->getRootDeviceEnvironmentRef().debugger.get());
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
pDevice->getRootDeviceEnvironmentRef().debugger.reset(nullptr);
pDevice->setDebugger(nullptr);
}
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndGTPinInitializedWhenCheckingIsaPoolingThenReturnFalse) {
debugManager.flags.EnableIsaAllocationPool.set(-1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
isGTPinInitialized = true;
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
isGTPinInitialized = false;
}
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndKernelBinaryReuseEnabledWhenCheckingIsaPoolingThenReturnFalse) {
debugManager.flags.EnableIsaAllocationPool.set(-1);
debugManager.flags.ReuseKernelBinaries.set(1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
}
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndAllConditionsMetWhenCheckingIsaPoolingThenReturnTrue) {
debugManager.flags.EnableIsaAllocationPool.set(-1);
debugManager.flags.ReuseKernelBinaries.set(0);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
isGTPinInitialized = false;
EXPECT_TRUE(pProgram->isIsaPoolingEnabled(*pDevice));
}
TEST_F(ProgramFromBinaryTest, givenVariousIsaSizesAndKernelPositionsWhenComputingSizeThenCorrectAlignmentAndPaddingAreApplied) {
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto alignment = gfxCoreHelper.getKernelIsaPointerAlignment();
auto padding = gfxCoreHelper.getPaddingForISAAllocation();
std::vector<size_t> testSizes = {0u, 1u, 64u, 256u, 1024u, 4096u};
for (auto isaSize : testSizes) {
// Test last kernel (with padding)
auto sizeWithPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, isaSize, true);
EXPECT_EQ(alignUp(isaSize + padding, alignment), sizeWithPadding);
EXPECT_TRUE(isAligned(sizeWithPadding, alignment));
EXPECT_GE(sizeWithPadding, isaSize + padding);
// Test not last kernel (without padding)
auto sizeWithoutPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, isaSize, false);
EXPECT_EQ(alignUp(isaSize, alignment), sizeWithoutPadding);
EXPECT_TRUE(isAligned(sizeWithoutPadding, alignment));
EXPECT_GE(sizeWithoutPadding, isaSize);
// Size with padding should be >= size without padding
EXPECT_GE(sizeWithPadding, sizeWithoutPadding);
}
// Test already aligned size
size_t alignedIsaSize = alignment * 4;
auto alignedWithoutPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, alignedIsaSize, false);
EXPECT_EQ(alignedIsaSize, alignedWithoutPadding);
auto alignedWithPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, alignedIsaSize, true);
EXPECT_EQ(alignUp(alignedIsaSize + padding, alignment), alignedWithPadding);
}
using ProgramGetNumKernelsTest = Test<NEOProgramFixture>;
TEST_F(ProgramGetNumKernelsTest, givenProgramWithFunctionsWhenGettingNumKernelsFunctionsAreNotExposed) {
program->resizeAndPopulateKernelInfoArray(2);
@@ -741,13 +989,17 @@ HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledBu
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
pProgram->build(pProgram->getDevices(), nullptr);
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getGraphicsAllocation();
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getIsaGraphicsAllocation();
const bool isIsaPooled = pProgram->getKernelsIsaParentAllocation(rootDeviceIndex);
kernelAllocation->updateTaskCount(100, csr.getOsContext().getContextId());
*csr.getTagAddress() = 0;
pProgram->cleanCurrentKernelInfo(rootDeviceIndex);
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
if (!isIsaPooled) {
EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty());
EXPECT_EQ(csr.getDeferredAllocations().peekHead(), kernelAllocation);
} else {
EXPECT_TRUE(csr.getDeferredAllocations().peekIsEmpty());
}
EXPECT_TRUE(this->pDevice->getUltCommandStreamReceiver<FamilyType>().requiresInstructionCacheFlush);
}
@@ -757,7 +1009,7 @@ HWTEST_F(ProgramFromBinaryTest, givenIsaAllocationUsedByMultipleCsrsWhenItIsDele
pProgram->build(pProgram->getDevices(), nullptr);
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getGraphicsAllocation();
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getIsaGraphicsAllocation();
csr0.makeResident(*kernelAllocation);
csr1.makeResident(*kernelAllocation);
@@ -3267,6 +3519,52 @@ TEST_F(ProgramBinTest, GivenDebugDataAvailableWhenLinkingProgramThenDebugDataIsS
EXPECT_NE(nullptr, pProgram->getDebugData(rootDeviceIndex));
}
TEST_F(ProgramBinTest, givenEnabledIsaAllocationPoolWhenMultipleProgramsCreatedThenProgramsShareIsaAllocation) {
DebugDataGuard debugDataGuard{true};
DebugManagerStateRestore restorer;
debugManager.flags.EnableIsaAllocationPool.set(1);
const size_t numPrograms = 5;
std::vector<MockProgram *> programs;
NEO::GraphicsAllocation *sharedAllocation;
const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n";
for (size_t i = 0; i < numPrograms; ++i) {
programs.emplace_back(Program::create<MockProgram>(
pContext,
1,
&sourceCode,
&knownSourceSize,
retVal));
retVal = programs[i]->build(programs[i]->getDevices(), nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
if (i == 0) {
sharedAllocation = programs[i]->getKernelsIsaParentAllocation(rootDeviceIndex);
EXPECT_TRUE(pDevice->getIsaPoolAllocator().isPoolBuffer(sharedAllocation));
}
auto kernelInfoArray = programs[i]->getKernelInfoArray(pDevice->getRootDeviceIndex());
auto offsetForKernelInfo = kernelInfoArray[0]->getIsaOffsetInParentAllocation();
for (auto &kernelInfo : kernelInfoArray) {
EXPECT_EQ(offsetForKernelInfo, kernelInfo->getIsaOffsetInParentAllocation());
offsetForKernelInfo += kernelInfo->getIsaSubAllocationSize();
}
// Verify that all kernel infos share same parent allocation
if (i != 0) {
EXPECT_EQ(sharedAllocation, programs[i]->getKernelsIsaParentAllocation(rootDeviceIndex));
}
}
for (auto &program : programs) {
program->release();
}
}
using ProgramMultiRootDeviceTests = MultiRootDeviceFixture;
TEST_F(ProgramMultiRootDeviceTests, WhenProgramIsCreatedThenBuildInfosVectorIsProperlyResized) {

View File

@@ -46,7 +46,14 @@ void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *prog
kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
mockAlloc = std::make_unique<MockGraphicsAllocation>();
if (isUsingSharedIsaAllocation) {
kernelInfo->setIsaParentAllocation(mockAlloc.get());
kernelInfo->setIsaSubAllocationOffset(isaSubAllocOffset);
kernelInfo->setIsaSubAllocationSize(isaSubAllocSize);
} else {
kernelInfo->kernelAllocation = mockAlloc.get();
}
program->addKernelInfo(kernelInfo.get(), rootDeviceIndex);

View File

@@ -27,6 +27,9 @@ class ProgramWithZebinFixture : public ProgramTests {
std::unique_ptr<MockBuffer> globalSurface;
std::unique_ptr<MockBuffer> constantSurface;
const char strings[12] = "Hello olleH";
bool isUsingSharedIsaAllocation = false;
const size_t isaSubAllocOffset = 48u;
const size_t isaSubAllocSize = 308u;
void SetUp() override;
void TearDown() override;
void addEmptyZebin(MockProgram *program);

View File

@@ -42,7 +42,7 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsThenSegmentsArePopulated) {
};
checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation(), segments.constData);
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation(), segments.varData);
checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]);
checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getIsaGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]);
EXPECT_EQ(reinterpret_cast<uintptr_t>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address);
EXPECT_EQ(reinterpret_cast<const char *>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), strings);
@@ -65,6 +65,33 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsWithSharedGlobalAndConstSurfac
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface.get(), segments.varData);
}
TEST_F(ProgramWithZebinFixture, givenSharedIsaAllocationWhenGetZebinSegmentsThenSegmentsAreCorrectlyPopulated) {
isUsingSharedIsaAllocation = true;
populateProgramWithSegments(program.get());
auto segments = program->getZebinSegments(rootDeviceIndex);
auto checkGPUSeg = [](NEO::GraphicsAllocation *alloc, NEO::Zebin::Debug::Segments::Segment segment) {
EXPECT_EQ(static_cast<uintptr_t>(alloc->getGpuAddress()), segment.address);
EXPECT_EQ(static_cast<size_t>(alloc->getUnderlyingBufferSize()), segment.size);
};
checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation(), segments.constData);
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation(), segments.varData);
{
auto kernelInfo = program->getKernelInfoArray(rootDeviceIndex)[0];
auto segment = segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName];
auto isaAlloc = kernelInfo->getIsaGraphicsAllocation();
auto offset = kernelInfo->getIsaOffsetInParentAllocation();
EXPECT_EQ(static_cast<uintptr_t>(isaAlloc->getGpuAddress() + offset), segment.address);
EXPECT_EQ(static_cast<size_t>(kernelInfo->getIsaSubAllocationSize()), segment.size);
}
EXPECT_EQ(reinterpret_cast<uintptr_t>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address);
EXPECT_EQ(reinterpret_cast<const char *>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), strings);
EXPECT_EQ(program->buildInfos[rootDeviceIndex].constStringSectionData.size, sizeof(strings));
}
TEST_F(ProgramWithZebinFixture, givenNonEmptyDebugDataThenDebugZebinIsNotCreated) {
addEmptyZebin(program.get());
populateProgramWithSegments(program.get());

View File

@@ -45,7 +45,7 @@ struct EnqueueFixtureXe2HpgCore : public ::testing::Test {
}
void TearDown() override {
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
}
template <typename FamilyType>
@@ -92,7 +92,7 @@ XE2_HPG_CORETEST_F(MemoryPrefetchTestsXe2HpgCore, givenKernelWhenWalkerIsProgram
auto gmmHelper = clDevice->getRootDeviceEnvironment().getGmmHelper();
EXPECT_EQ(gmmHelper->decanonize(mockKernel->kernelInfo.getGraphicsAllocation()->getGpuAddress()), statePrefetchCmd->getAddress());
EXPECT_EQ(gmmHelper->decanonize(mockKernel->kernelInfo.getIsaGraphicsAllocation()->getGpuAddress()), statePrefetchCmd->getAddress());
EXPECT_TRUE(statePrefetchCmd->getKernelInstructionPrefetch());
}

View File

@@ -45,7 +45,7 @@ struct EnqueueFixtureXeHpcCore : public ::testing::Test {
}
void TearDown() override {
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
}
template <typename FamilyType>
@@ -90,7 +90,7 @@ XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenKernelWhenWalkerIsProgramme
auto statePrefetchCmd = genCmdCast<STATE_PREFETCH *>(*itorStatePrefetch);
EXPECT_NE(nullptr, statePrefetchCmd);
EXPECT_EQ(mockKernel->kernelInfo.getGraphicsAllocation()->getGpuAddress(), statePrefetchCmd->getAddress());
EXPECT_EQ(mockKernel->kernelInfo.getIsaGraphicsAllocation()->getGpuAddress(), statePrefetchCmd->getAddress());
EXPECT_TRUE(statePrefetchCmd->getKernelInstructionPrefetch());
}

View File

@@ -440,6 +440,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceUsmAllocationPool, -1, "-1: default
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmAllocationPool, -1, "-1: default (enabled, 2MB), 0: disabled, >=1: enabled, size in MB")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmAllocationPoolManager, -1, "-1: default, 0: disabled, 1: enabled, use growing pools")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmPoolResidencyTracking, -1, "-1: default, 0: disabled, 1: enabled, track residency per chunk")
DECLARE_DEBUG_VARIABLE(int32_t, EnableIsaAllocationPool, -1, "-1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmPoolLazyInit, -1, "-1: default, 0: disabled, 1: enabled, initialize usm pools on first alloc")
DECLARE_DEBUG_VARIABLE(int32_t, UseLocalPreferredForCacheableBuffers, -1, "Use localPreferred for cacheable buffers")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCopyWithStagingBuffers, -1, "Enable copy with non-usm memory through staging buffers. -1: default, 0: disabled, 1: enabled")

View File

@@ -120,6 +120,31 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) {
setIfValidOffset(constants.maxWorkGroupSize, implicitArgs.maxWorkGroupSize);
}
GraphicsAllocation *KernelInfo::getIsaGraphicsAllocation() const {
if (auto allocation = this->getIsaParentAllocation(); allocation != nullptr) {
DEBUG_BREAK_IF(this->kernelAllocation != nullptr);
return allocation;
} else {
DEBUG_BREAK_IF(this->kernelAllocation == nullptr);
return this->kernelAllocation;
}
}
uint32_t KernelInfo::getIsaSize() const {
if (this->getIsaParentAllocation()) {
DEBUG_BREAK_IF(this->kernelAllocation != nullptr);
return static_cast<uint32_t>(this->isaSubAllocationSize);
} else {
DEBUG_BREAK_IF(this->kernelAllocation == nullptr);
return static_cast<uint32_t>(this->kernelAllocation->getUnderlyingBufferSize());
}
}
void KernelInfo::setIsaPerKernelAllocation(GraphicsAllocation *allocation) {
DEBUG_BREAK_IF(this->isaParentAllocation != nullptr);
this->kernelAllocation = allocation;
}
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {
std::string semiColonDelimitedKernelNameStr;

View File

@@ -83,6 +83,30 @@ struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
bool createKernelAllocation(const Device &device, bool internalIsa);
void apply(const DeviceInfoKernelPayloadConstants &constants);
uint32_t getIsaSize() const;
GraphicsAllocation *getIsaGraphicsAllocation() const;
void setIsaPerKernelAllocation(GraphicsAllocation *allocation);
inline GraphicsAllocation *getIsaParentAllocation() const {
return isaParentAllocation;
}
inline void setIsaParentAllocation(GraphicsAllocation *allocation) {
isaParentAllocation = allocation;
}
inline size_t getIsaOffsetInParentAllocation() const {
DEBUG_BREAK_IF(this->kernelAllocation != nullptr && 0u != isaSubAllocationOffset);
return isaSubAllocationOffset;
}
inline void setIsaSubAllocationOffset(size_t offset) {
isaSubAllocationOffset = offset;
}
inline void setIsaSubAllocationSize(size_t size) {
isaSubAllocationSize = size;
}
inline size_t getIsaSubAllocationSize() const {
return isaSubAllocationSize;
}
HeapInfo heapInfo = {};
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
char *crossThreadData = nullptr;
@@ -97,6 +121,11 @@ struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
uint64_t shaderHashCode;
KernelDescriptor kernelDescriptor;
private:
GraphicsAllocation *isaParentAllocation = nullptr;
size_t isaSubAllocationOffset = 0lu;
size_t isaSubAllocationSize = 0lu;
};
static_assert(NEO::NonCopyableAndNonMovable<KernelInfo>);

View File

@@ -593,6 +593,7 @@ EnableUserFenceUponUnbind = -1
EnableWaitOnUserFenceAfterBindAndUnbind = -1
UseGemCreateExtInAllocateMemoryByKMD = -1
PrintMmapAndMunMapCalls = -1
EnableIsaAllocationPool = -1
UseLocalPreferredForCacheableBuffers = -1
EnableFtrTile64Optimization = 0
ForceTlbFlushWithTaskCountAfterCopy = -1