mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
feature: implement ISA allocation pooling in OpenCL
Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a627594e83
commit
1b9b78ac16
@@ -124,8 +124,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
if constexpr (heaplessModeEnabled == false) {
|
||||
if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) {
|
||||
EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, 0, rootDeviceEnvironment);
|
||||
if (auto kernelAllocation = kernelInfo.getIsaGraphicsAllocation()) {
|
||||
EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.kernelHeapSize, kernelInfo.getIsaOffsetInParentAllocation(), rootDeviceEnvironment);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -515,7 +515,7 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
||||
srcSize = getKernelHeapSize();
|
||||
break;
|
||||
case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL:
|
||||
nonCannonizedGpuAddress = gmmHelper->decanonize(kernelInfo.kernelAllocation->getGpuAddress());
|
||||
nonCannonizedGpuAddress = gmmHelper->decanonize(kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo.getIsaOffsetInParentAllocation());
|
||||
pSrc = &nonCannonizedGpuAddress;
|
||||
srcSize = sizeof(nonCannonizedGpuAddress);
|
||||
break;
|
||||
@@ -788,21 +788,23 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
|
||||
pKernelInfo->isKernelHeapSubstituted = true;
|
||||
auto memoryManager = executionEnvironment.memoryManager.get();
|
||||
|
||||
auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize();
|
||||
auto currentAllocationSize = pKernelInfo->getIsaSize();
|
||||
bool status = false;
|
||||
auto &rootDeviceEnvironment = clDevice.getRootDeviceEnvironment();
|
||||
auto &helper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
size_t isaPadding = helper.getPaddingForISAAllocation();
|
||||
|
||||
DEBUG_BREAK_IF(nullptr != pKernelInfo->getIsaParentAllocation());
|
||||
|
||||
if (currentAllocationSize >= newKernelHeapSize + isaPadding) {
|
||||
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *pKernelInfo->getGraphicsAllocation());
|
||||
auto useBlitter = productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *pKernelInfo->getIsaGraphicsAllocation());
|
||||
status = MemoryTransferHelper::transferMemoryToAllocation(useBlitter,
|
||||
clDevice.getDevice(), pKernelInfo->getGraphicsAllocation(), 0, newKernelHeap,
|
||||
clDevice.getDevice(), pKernelInfo->getIsaGraphicsAllocation(), 0, newKernelHeap,
|
||||
static_cast<size_t>(newKernelHeapSize));
|
||||
} else {
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation);
|
||||
pKernelInfo->kernelAllocation = nullptr;
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->getIsaGraphicsAllocation());
|
||||
pKernelInfo->setIsaPerKernelAllocation(nullptr);
|
||||
status = pKernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn);
|
||||
}
|
||||
UNRECOVERABLE_IF(!status);
|
||||
@@ -1315,7 +1317,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
}
|
||||
makeArgsResident(commandStreamReceiver);
|
||||
|
||||
auto kernelIsaAllocation = this->kernelInfo.kernelAllocation;
|
||||
auto kernelIsaAllocation = this->kernelInfo.getIsaGraphicsAllocation();
|
||||
if (kernelIsaAllocation) {
|
||||
commandStreamReceiver.makeResident(*kernelIsaAllocation);
|
||||
}
|
||||
@@ -1381,8 +1383,8 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
|
||||
}
|
||||
}
|
||||
|
||||
auto kernelIsaAllocation = this->kernelInfo.kernelAllocation;
|
||||
if (kernelIsaAllocation) {
|
||||
if (auto kernelIsaAllocation = this->kernelInfo.getIsaGraphicsAllocation();
|
||||
kernelIsaAllocation != nullptr) {
|
||||
GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation);
|
||||
dst.push_back(surface);
|
||||
}
|
||||
@@ -1922,12 +1924,13 @@ bool Kernel::hasIndirectStatelessAccessToHostMemory() const {
|
||||
}
|
||||
|
||||
uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const {
|
||||
|
||||
uint64_t kernelStartOffset = 0;
|
||||
|
||||
if (kernelInfo.getGraphicsAllocation()) {
|
||||
kernelStartOffset = returnFullAddress ? kernelInfo.getGraphicsAllocation()->getGpuAddress()
|
||||
: kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
if (kernelInfo.getIsaGraphicsAllocation()) {
|
||||
auto offsetInParentAllocation = kernelInfo.getIsaOffsetInParentAllocation();
|
||||
kernelStartOffset = returnFullAddress ? kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + offsetInParentAllocation
|
||||
: kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch() + offsetInParentAllocation;
|
||||
|
||||
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
|
||||
kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
@@ -109,14 +110,16 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
|
||||
exportedFunctionsKernelId = static_cast<size_t>(linkerInput->getExportedFunctionsSegmentId());
|
||||
// Exported functions reside in instruction heap of one of kernels
|
||||
auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId();
|
||||
buildInfos[rootDeviceIndex].exportedFunctionsSurface = kernelInfoArray[exportedFunctionHeapId]->getGraphicsAllocation();
|
||||
buildInfos[rootDeviceIndex].exportedFunctionsSurface = kernelInfoArray[exportedFunctionHeapId]->getIsaGraphicsAllocation();
|
||||
auto offsetInParentAllocation = kernelInfoArray[exportedFunctionHeapId]->getIsaOffsetInParentAllocation();
|
||||
|
||||
auto &compilerProductHelper = pDevice->getCompilerProductHelper();
|
||||
if (compilerProductHelper.isHeaplessModeEnabled(pDevice->getHardwareInfo())) {
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddress());
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddress() + offsetInParentAllocation);
|
||||
} else {
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddressToPatch());
|
||||
exportedFunctions.gpuAddress = static_cast<uintptr_t>(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddressToPatch() + offsetInParentAllocation);
|
||||
}
|
||||
exportedFunctions.segmentSize = buildInfos[rootDeviceIndex].exportedFunctionsSurface->getUnderlyingBufferSize();
|
||||
exportedFunctions.segmentSize = kernelInfoArray[exportedFunctionHeapId]->getIsaSize();
|
||||
}
|
||||
Linker::PatchableSegments isaSegmentsForPatching;
|
||||
std::vector<std::vector<char>> patchedIsaTempStorage;
|
||||
@@ -128,8 +131,8 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
|
||||
auto &kernHeapInfo = kernelInfo->heapInfo;
|
||||
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
|
||||
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.kernelHeapSize));
|
||||
DEBUG_BREAK_IF(nullptr == kernelInfo->getGraphicsAllocation());
|
||||
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelInfo->getGraphicsAllocation()->getGpuAddressToPatch()), kernHeapInfo.kernelHeapSize});
|
||||
DEBUG_BREAK_IF(nullptr == kernelInfo->getIsaGraphicsAllocation());
|
||||
isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddressToPatch() + kernelInfo->getIsaOffsetInParentAllocation()), kernHeapInfo.kernelHeapSize});
|
||||
kernelDescriptors.push_back(&kernelInfo->kernelDescriptor);
|
||||
}
|
||||
}
|
||||
@@ -151,16 +154,8 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_
|
||||
updateBuildLog(pDevice->getRootDeviceIndex(), error.c_str(), error.size());
|
||||
return CL_INVALID_BINARY;
|
||||
} else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
|
||||
for (auto kernelId = 0u; kernelId < kernelInfoArray.size(); kernelId++) {
|
||||
const auto &kernelInfo = kernelInfoArray[kernelId];
|
||||
auto &kernHeapInfo = kernelInfo->heapInfo;
|
||||
auto segmentId = &kernelInfo - &kernelInfoArray[0];
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
const auto &productHelper = pDevice->getProductHelper();
|
||||
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelInfo->getGraphicsAllocation()),
|
||||
*pDevice, kernelInfo->getGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer,
|
||||
static_cast<size_t>(kernHeapInfo.kernelHeapSize));
|
||||
}
|
||||
[[maybe_unused]] auto success = transferIsaSegmentsToAllocation(pDevice, kernelInfoArray, &isaSegmentsForPatching, rootDeviceIndex);
|
||||
DEBUG_BREAK_IF(!success);
|
||||
}
|
||||
DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->getSymbols(pDevice->getRootDeviceIndex())));
|
||||
return CL_SUCCESS;
|
||||
@@ -328,19 +323,11 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) {
|
||||
}
|
||||
buildInfos[rootDeviceIndex].kernelMiscInfoPos = src.kernelMiscInfoPos;
|
||||
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
if (kernelInfo->heapInfo.kernelHeapSize) {
|
||||
retVal = kernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
if (retVal != CL_SUCCESS) {
|
||||
if (auto retVal = setIsaGraphicsAllocations(clDevice.getDevice(), kernelInfoArray, deviceInfoConstants, rootDeviceIndex);
|
||||
retVal != CL_SUCCESS) {
|
||||
return retVal;
|
||||
}
|
||||
|
||||
kernelInfo->apply(deviceInfoConstants);
|
||||
}
|
||||
|
||||
indirectDetectionVersion = src.indirectDetectionVersion;
|
||||
indirectAccessBufferMajorVersion = src.indirectAccessBufferMajorVersion;
|
||||
|
||||
@@ -383,8 +370,16 @@ Zebin::Debug::Segments Program::getZebinSegments(uint32_t rootDeviceIndex) {
|
||||
buildInfos[rootDeviceIndex].constStringSectionData.size};
|
||||
std::vector<NEO::Zebin::Debug::Segments::KernelNameIsaTupleT> kernels;
|
||||
for (const auto &kernelInfo : buildInfos[rootDeviceIndex].kernelInfoArray) {
|
||||
NEO::Zebin::Debug::Segments::Segment segment;
|
||||
|
||||
if (kernelInfo->getIsaParentAllocation()) {
|
||||
segment.address = static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo->getIsaOffsetInParentAllocation());
|
||||
segment.size = kernelInfo->getIsaSubAllocationSize();
|
||||
} else {
|
||||
segment.address = static_cast<uintptr_t>(kernelInfo->getIsaGraphicsAllocation()->getGpuAddress());
|
||||
segment.size = kernelInfo->getIsaGraphicsAllocation()->getUnderlyingBufferSize();
|
||||
}
|
||||
|
||||
NEO::Zebin::Debug::Segments::Segment segment = {static_cast<uintptr_t>(kernelInfo->getGraphicsAllocation()->getGpuAddress()), kernelInfo->getGraphicsAllocation()->getUnderlyingBufferSize()};
|
||||
kernels.push_back({kernelInfo->kernelDescriptor.kernelMetadata.kernelName, segment});
|
||||
}
|
||||
return Zebin::Debug::Segments(getGlobalSurface(rootDeviceIndex), getConstantSurface(rootDeviceIndex), strings, kernels);
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/addressing_mode_helper.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/compiler_options_parser.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
@@ -29,10 +30,13 @@
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/source/program/metadata_generation.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevicesIn) : executionEnvironment(*clDevicesIn[0]->getExecutionEnvironment()),
|
||||
@@ -301,6 +305,172 @@ cl_int Program::createProgramFromBinary(
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool Program::isIsaPoolingEnabled(Device &neoDevice) {
|
||||
if (auto dbgFlag = debugManager.flags.EnableIsaAllocationPool.get(); dbgFlag != -1) {
|
||||
return static_cast<bool>(dbgFlag);
|
||||
}
|
||||
|
||||
return neoDevice.getProductHelper().is2MBLocalMemAlignmentEnabled() &&
|
||||
nullptr == neoDevice.getL0Debugger() &&
|
||||
false == gtpinIsGTPinInitialized() &&
|
||||
false == neoDevice.getMemoryManager()->isKernelBinaryReuseEnabled();
|
||||
}
|
||||
|
||||
cl_int Program::setIsaGraphicsAllocations(Device &neoDevice, std::vector<KernelInfo *> &kernelInfoArray, DeviceInfoKernelPayloadConstants &deviceInfoConstants, uint32_t rootDeviceIndex) {
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
kernelInfo->apply(deviceInfoConstants);
|
||||
}
|
||||
|
||||
std::vector<KernelInfo *> validKernelInfos;
|
||||
validKernelInfos.reserve(kernelInfoArray.size());
|
||||
std::ranges::copy_if(kernelInfoArray,
|
||||
std::back_inserter(validKernelInfos),
|
||||
[](const KernelInfo *info) { return info->heapInfo.kernelHeapSize != 0; });
|
||||
|
||||
DEBUG_BREAK_IF(validKernelInfos.size() != kernelInfoArray.size());
|
||||
|
||||
if (validKernelInfos.empty()) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
if (isIsaPoolingEnabled(neoDevice)) {
|
||||
const size_t kernelsCount = validKernelInfos.size();
|
||||
std::vector<std::pair<size_t, size_t>> kernelsChunks(kernelsCount);
|
||||
size_t kernelsIsaTotalSize = 0u;
|
||||
|
||||
for (size_t i = 0; i < kernelsCount; i++) {
|
||||
auto chunkOffset = kernelsIsaTotalSize;
|
||||
auto chunkSize = computeKernelIsaAllocationAlignedSizeWithPadding(neoDevice, validKernelInfos[i]->heapInfo.kernelHeapSize, ((i + 1) == kernelsCount));
|
||||
kernelsIsaTotalSize += chunkSize;
|
||||
kernelsChunks[i] = {chunkOffset, chunkSize};
|
||||
}
|
||||
|
||||
auto &isaAllocator = neoDevice.getIsaPoolAllocator();
|
||||
auto crossProgramAllocation = isaAllocator.requestGraphicsAllocationForIsa(isBuiltIn, kernelsIsaTotalSize);
|
||||
if (crossProgramAllocation == nullptr) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
auto &sharedIsaAllocation = buildInfos[rootDeviceIndex].sharedIsaAllocation;
|
||||
sharedIsaAllocation.reset(crossProgramAllocation);
|
||||
|
||||
for (size_t i = 0; i < kernelsCount; i++) {
|
||||
auto [isaOffset, isaSize] = kernelsChunks[i];
|
||||
validKernelInfos[i]->setIsaParentAllocation(sharedIsaAllocation->getGraphicsAllocation());
|
||||
validKernelInfos[i]->setIsaSubAllocationOffset(sharedIsaAllocation->getOffset() + isaOffset);
|
||||
validKernelInfos[i]->setIsaSubAllocationSize(isaSize);
|
||||
}
|
||||
|
||||
if (!transferIsaSegmentsToAllocation(&neoDevice, validKernelInfos, nullptr, rootDeviceIndex)) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
} else {
|
||||
for (auto &kernelInfo : validKernelInfos) {
|
||||
if (!kernelInfo->createKernelAllocation(neoDevice, isBuiltIn)) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool Program::transferIsaSegmentsToAllocation(Device *pDevice, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching, uint32_t rootDeviceIndex) {
|
||||
const auto &productHelper = pDevice->getProductHelper();
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
|
||||
auto &sharedIsaAllocation = buildInfos[rootDeviceIndex].sharedIsaAllocation;
|
||||
if (sharedIsaAllocation) {
|
||||
const auto isaBufferSize = sharedIsaAllocation->getSize();
|
||||
DEBUG_BREAK_IF(isaBufferSize == 0);
|
||||
|
||||
std::vector<std::byte> isaBuffer(isaBufferSize);
|
||||
std::memset(isaBuffer.data(), 0x0, isaBufferSize);
|
||||
auto programOffset = sharedIsaAllocation->getOffset();
|
||||
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
kernelInfo->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
kernelInfo->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
|
||||
auto [kernelHeapPtr, kernelHeapSize] = getKernelHeapPointerAndSize(kernelInfo, kernelInfoArray, isaSegmentsForPatching);
|
||||
auto isaOffset = kernelInfo->getIsaOffsetInParentAllocation() - programOffset;
|
||||
memcpy_s(isaBuffer.data() + isaOffset,
|
||||
isaBufferSize - isaOffset,
|
||||
kernelHeapPtr,
|
||||
kernelHeapSize);
|
||||
}
|
||||
|
||||
auto programAllocation = sharedIsaAllocation->getGraphicsAllocation();
|
||||
auto lock = sharedIsaAllocation->obtainSharedAllocationLock();
|
||||
|
||||
auto success = NEO::MemoryTransferHelper::transferMemoryToAllocation(
|
||||
productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *programAllocation),
|
||||
*pDevice,
|
||||
programAllocation,
|
||||
programOffset,
|
||||
isaBuffer.data(),
|
||||
isaBuffer.size());
|
||||
|
||||
if (pDevice->getDefaultEngine().commandStreamReceiver->getType() != NEO::CommandStreamReceiverType::hardware) {
|
||||
pDevice->getDefaultEngine().commandStreamReceiver->writeMemory(*programAllocation);
|
||||
}
|
||||
|
||||
return success;
|
||||
} else {
|
||||
if (pDevice->getMemoryManager()->isKernelBinaryReuseEnabled()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
auto [kernelHeapPtr, kernelHeapSize] = getKernelHeapPointerAndSize(kernelInfo, kernelInfoArray, isaSegmentsForPatching);
|
||||
|
||||
if (nullptr == kernelInfo->getIsaGraphicsAllocation() || 0 == kernelHeapSize) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(0 == kernelInfo->heapInfo.kernelHeapSize);
|
||||
DEBUG_BREAK_IF(kernelInfo->getIsaGraphicsAllocation() == kernelInfo->getIsaParentAllocation());
|
||||
|
||||
bool success = MemoryTransferHelper::transferMemoryToAllocation(
|
||||
productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *kernelInfo->getIsaGraphicsAllocation()),
|
||||
*pDevice,
|
||||
kernelInfo->getIsaGraphicsAllocation(),
|
||||
0,
|
||||
kernelHeapPtr,
|
||||
kernelHeapSize);
|
||||
|
||||
if (!success) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<const void *, size_t> Program::getKernelHeapPointerAndSize(KernelInfo *const &kernelInfo, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching) {
|
||||
if (isaSegmentsForPatching) {
|
||||
auto &segments = *isaSegmentsForPatching;
|
||||
auto segmentId = &kernelInfo - &kernelInfoArray[0];
|
||||
return {segments[segmentId].hostPointer, segments[segmentId].segmentSize};
|
||||
} else {
|
||||
return {kernelInfo->heapInfo.pKernelHeap, static_cast<size_t>(kernelInfo->heapInfo.kernelHeapSize)};
|
||||
}
|
||||
}
|
||||
|
||||
size_t Program::computeKernelIsaAllocationAlignedSizeWithPadding(const Device &neoDevice, size_t isaSize, bool lastKernel) {
|
||||
auto isaPadding = lastKernel ? neoDevice.getGfxCoreHelper().getPaddingForISAAllocation() : 0u;
|
||||
auto kernelStartPointerAlignment = neoDevice.getGfxCoreHelper().getKernelIsaPointerAlignment();
|
||||
auto isaAllocationSize = alignUp(isaPadding + isaSize, kernelStartPointerAlignment);
|
||||
return isaAllocationSize;
|
||||
}
|
||||
|
||||
GraphicsAllocation *Program::getKernelsIsaParentAllocation(uint32_t rootDeviceIndex) const {
|
||||
if (!buildInfos[rootDeviceIndex].sharedIsaAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
return buildInfos[rootDeviceIndex].sharedIsaAllocation->getGraphicsAllocation();
|
||||
}
|
||||
|
||||
cl_int Program::setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) {
|
||||
if (!isSpirV) {
|
||||
return CL_INVALID_PROGRAM;
|
||||
@@ -381,11 +551,11 @@ const char *Program::getBuildLog(uint32_t rootDeviceIndex) const {
|
||||
void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
|
||||
auto &buildInfo = buildInfos[rootDeviceIndex];
|
||||
for (auto &kernelInfo : buildInfo.kernelInfoArray) {
|
||||
if (kernelInfo->kernelAllocation) {
|
||||
if (kernelInfo->getIsaGraphicsAllocation()) {
|
||||
// register cache flush in all csrs where kernel allocation was used
|
||||
for (auto &engine : this->executionEnvironment.memoryManager->getRegisteredEngines(rootDeviceIndex)) {
|
||||
auto contextId = engine.osContext->getContextId();
|
||||
if (kernelInfo->kernelAllocation->isUsedByOsContext(contextId)) {
|
||||
if (kernelInfo->getIsaGraphicsAllocation()->isUsedByOsContext(contextId)) {
|
||||
engine.commandStreamReceiver->registerInstructionCacheFlush();
|
||||
}
|
||||
}
|
||||
@@ -403,13 +573,26 @@ void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation);
|
||||
if (!buildInfo.sharedIsaAllocation) {
|
||||
this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->getIsaGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
}
|
||||
delete kernelInfo;
|
||||
}
|
||||
buildInfo.kernelInfoArray.clear();
|
||||
metadataGeneration.reset(new MetadataGeneration());
|
||||
|
||||
if (buildInfo.sharedIsaAllocation) {
|
||||
for (auto &device : clDevices) {
|
||||
auto &isaAllocator = device->getDevice().getIsaPoolAllocator();
|
||||
if (isaAllocator.isPoolBuffer(buildInfo.sharedIsaAllocation->getGraphicsAllocation())) {
|
||||
isaAllocator.freeSharedIsaAllocation(buildInfo.sharedIsaAllocation.release());
|
||||
break;
|
||||
}
|
||||
}
|
||||
DEBUG_BREAK_IF(buildInfo.sharedIsaAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
void Program::updateNonUniformFlag() {
|
||||
@@ -666,9 +849,15 @@ StackVec<NEO::GraphicsAllocation *, 32> Program::getModuleAllocations(uint32_t r
|
||||
StackVec<NEO::GraphicsAllocation *, 32> allocs;
|
||||
auto &kernelInfoArray = buildInfos[rootIndex].kernelInfoArray;
|
||||
|
||||
if (auto isaParentAllocation = this->getKernelsIsaParentAllocation(rootIndex);
|
||||
isaParentAllocation != nullptr) {
|
||||
allocs.push_back(isaParentAllocation);
|
||||
} else {
|
||||
for (const auto &kernelInfo : kernelInfoArray) {
|
||||
allocs.push_back(kernelInfo->getGraphicsAllocation());
|
||||
allocs.push_back(kernelInfo->getIsaGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
GraphicsAllocation *globalsForPatching = getGlobalSurfaceGA(rootIndex);
|
||||
GraphicsAllocation *constantsForPatching = getConstantSurfaceGA(rootIndex);
|
||||
|
||||
|
||||
@@ -37,7 +37,9 @@ struct MetadataGeneration;
|
||||
struct KernelInfo;
|
||||
enum class DecodeError : uint8_t;
|
||||
struct ExternalFunctionInfo;
|
||||
class SharedPoolAlloction;
|
||||
class SharedPoolAllocation;
|
||||
struct DeviceInfoKernelPayloadConstants;
|
||||
class ProductHelper;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_program> {
|
||||
@@ -196,6 +198,31 @@ class Program : public BaseObject<_cl_program> {
|
||||
NEO::GraphicsAllocation *getGlobalSurfaceGA(uint32_t rootDeviceIndex) const;
|
||||
NEO::GraphicsAllocation *getExportedFunctionsSurface(uint32_t rootDeviceIndex) const;
|
||||
|
||||
MOCKABLE_VIRTUAL bool isIsaPoolingEnabled(Device &neoDevice);
|
||||
cl_int setIsaGraphicsAllocations(
|
||||
Device &neoDevice,
|
||||
std::vector<KernelInfo *> &kernelInfoArray,
|
||||
DeviceInfoKernelPayloadConstants &deviceInfoConstants,
|
||||
uint32_t rootDeviceIndex);
|
||||
|
||||
MOCKABLE_VIRTUAL bool transferIsaSegmentsToAllocation(
|
||||
Device *pDevice,
|
||||
std::vector<KernelInfo *> &kernelInfoArray,
|
||||
const Linker::PatchableSegments *isaSegmentsForPatching,
|
||||
uint32_t rootDeviceIndex);
|
||||
|
||||
std::pair<const void *, size_t> getKernelHeapPointerAndSize(
|
||||
KernelInfo *const &kernelInfo,
|
||||
std::vector<KernelInfo *> &kernelInfoArray,
|
||||
const Linker::PatchableSegments *isaSegmentsForPatching);
|
||||
|
||||
size_t computeKernelIsaAllocationAlignedSizeWithPadding(
|
||||
const Device &neoDevice,
|
||||
size_t isaSize,
|
||||
bool lastKernel);
|
||||
|
||||
GraphicsAllocation *getKernelsIsaParentAllocation(uint32_t rootDeviceIndex) const;
|
||||
|
||||
void cleanCurrentKernelInfo(uint32_t rootDeviceIndex);
|
||||
|
||||
const std::string &getOptions() const { return options; }
|
||||
@@ -331,6 +358,7 @@ class Program : public BaseObject<_cl_program> {
|
||||
std::vector<KernelInfo *> kernelInfoArray;
|
||||
std::unique_ptr<NEO::SharedPoolAllocation> constantSurface;
|
||||
std::unique_ptr<NEO::SharedPoolAllocation> globalSurface;
|
||||
std::unique_ptr<NEO::SharedPoolAllocation> sharedIsaAllocation;
|
||||
GraphicsAllocation *exportedFunctionsSurface = nullptr;
|
||||
size_t globalVarTotalSize = 0U;
|
||||
std::unique_ptr<LinkerInput> linkerInput;
|
||||
|
||||
@@ -379,8 +379,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, WhenEnqueueDimension
|
||||
EXPECT_EQ(1u, walker->getGenerateLocalId());
|
||||
|
||||
constexpr bool isHeapless = FamilyType::template isHeaplessMode<WalkerType>();
|
||||
auto kernelAllocationGpuAddr = isHeapless ? kernel->getKernelInfo().kernelAllocation->getGpuAddress()
|
||||
: kernel->getKernelInfo().kernelAllocation->getGpuAddressToPatch();
|
||||
auto offsetInParentAllocation = kernel->getKernelInfo().getIsaOffsetInParentAllocation();
|
||||
auto kernelAllocationGpuAddr = isHeapless ? kernel->getKernelInfo().getIsaGraphicsAllocation()->getGpuAddress() + offsetInParentAllocation
|
||||
: kernel->getKernelInfo().getIsaGraphicsAllocation()->getGpuAddressToPatch() + offsetInParentAllocation;
|
||||
|
||||
auto skipOffset = kernel->getKernelInfo().kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
uint64_t kernelStartPointer = kernelAllocationGpuAddr + skipOffset;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
@@ -1236,6 +1237,12 @@ HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnq
|
||||
if (it->first == bufferAllocation) {
|
||||
expected = 3u;
|
||||
}
|
||||
|
||||
// Shared parent isa allocation for all kernels
|
||||
if (pDevice->getIsaPoolAllocator().isPoolBuffer(it->first)) {
|
||||
expected = 2u;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expected, it->second);
|
||||
}
|
||||
|
||||
|
||||
@@ -941,6 +941,10 @@ TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned
|
||||
|
||||
TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) {
|
||||
USE_REAL_FILE_SYSTEM();
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.EnableIsaAllocationPool.set(false);
|
||||
|
||||
auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
|
||||
ASSERT_NE(nullptr, svmData);
|
||||
GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -116,15 +116,22 @@ void KernelDataTest::buildAndDecode() {
|
||||
EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pSsh, pSsh, sshSize));
|
||||
}
|
||||
if (kernelHeapSize) {
|
||||
auto kernelAllocation = pKernelInfo->getGraphicsAllocation();
|
||||
auto kernelAllocation = pKernelInfo->getIsaGraphicsAllocation();
|
||||
UNRECOVERABLE_IF(kernelAllocation == nullptr);
|
||||
auto &device = pContext->getDevice(0)->getDevice();
|
||||
auto &helper = device.getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
size_t isaPadding = helper.getPaddingForISAAllocation();
|
||||
EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize + isaPadding);
|
||||
size_t expectedIsaSize = kernelHeapSize + isaPadding;
|
||||
if (program->getKernelsIsaParentAllocation(rootDeviceIndex)) {
|
||||
expectedIsaSize = alignUp(expectedIsaSize, pContext->getDevice(0)->getDevice().getGfxCoreHelper().getKernelIsaPointerAlignment());
|
||||
}
|
||||
EXPECT_EQ(pKernelInfo->getIsaSize(), expectedIsaSize);
|
||||
auto kernelIsa = kernelAllocation->getUnderlyingBuffer();
|
||||
if (pKernelInfo->getIsaParentAllocation()) {
|
||||
kernelIsa = ptrOffset(kernelIsa, pKernelInfo->getIsaOffsetInParentAllocation());
|
||||
}
|
||||
EXPECT_EQ(0, memcmp(kernelIsa, pKernelInfo->heapInfo.pKernelHeap, kernelHeapSize));
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pKernelInfo->getGraphicsAllocation());
|
||||
EXPECT_EQ(nullptr, pKernelInfo->getIsaGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -60,8 +60,9 @@ class KernelDataTest : public testing::Test {
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (pKernelInfo->kernelAllocation) {
|
||||
pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
auto isaAllocation = pKernelInfo->getIsaGraphicsAllocation();
|
||||
if (isaAllocation && !pKernelInfo->getIsaParentAllocation()) {
|
||||
pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(isaAllocation);
|
||||
const_cast<KernelInfo *>(pKernelInfo)->kernelAllocation = nullptr;
|
||||
}
|
||||
program.reset();
|
||||
|
||||
@@ -170,6 +170,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
|
||||
public:
|
||||
void setUp() {
|
||||
debugManager.flags.GTPinAllocateBufferInSharedMemory.set(false);
|
||||
debugManager.flags.EnableIsaAllocationPool.set(false);
|
||||
setUpImpl();
|
||||
}
|
||||
|
||||
|
||||
@@ -219,7 +219,8 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto gmmHelper = pDevice->getGmmHelper();
|
||||
auto expectedGpuAddress = gmmHelper->decanonize(kernel->getKernelInfo().kernelAllocation->getGpuAddress());
|
||||
auto &kernelInfo = kernel->getKernelInfo();
|
||||
auto expectedGpuAddress = gmmHelper->decanonize(kernelInfo.getIsaGraphicsAllocation()->getGpuAddress() + kernelInfo.getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(expectedGpuAddress, paramValue);
|
||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
}
|
||||
@@ -2900,11 +2901,11 @@ TEST(KernelInfoTest, givenGfxCoreHelperWhenCreatingKernelAllocationThenCorrectPa
|
||||
mockKernel->kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
mockKernel->kernelInfo.createKernelAllocation(clDevice->getDevice(), false);
|
||||
|
||||
auto graphicsAllocation = mockKernel->kernelInfo.getGraphicsAllocation();
|
||||
auto graphicsAllocation = mockKernel->kernelInfo.getIsaGraphicsAllocation();
|
||||
auto &helper = clDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
size_t isaPadding = helper.getPaddingForISAAllocation();
|
||||
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), mockKernel->kernelInfo.heapInfo.kernelHeapSize + isaPadding);
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) {
|
||||
@@ -3469,12 +3470,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
|
||||
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, true, false, false);
|
||||
EXPECT_EQ(allocationOffset + 256u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenReturnFullAddress) {
|
||||
@@ -3485,9 +3486,9 @@ TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenRetur
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
auto address = mockKernel.mockKernel->getKernelStartAddress(false, true, false, true);
|
||||
EXPECT_EQ(mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddress(), address);
|
||||
EXPECT_EQ(mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddress(), address);
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
|
||||
@@ -3498,12 +3499,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
|
||||
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(true, true, false, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
|
||||
@@ -3514,12 +3515,12 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
|
||||
mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128;
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
auto allocationOffset = mockKernel.kernelInfo.getIsaGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
mockKernel.mockKernel->setStartOffset(128);
|
||||
auto offset = mockKernel.mockKernel->getKernelStartAddress(false, false, false, false);
|
||||
EXPECT_EQ(allocationOffset + 128u, offset);
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
|
||||
device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) {
|
||||
@@ -4163,3 +4164,89 @@ TEST(KernelTest, whenCallingGetEnqueuedLocalWorkSizeValuesThenReturnProperValues
|
||||
EXPECT_EQ(expectedELWS[1], *(enqueuedLocalWorkSize[1]));
|
||||
EXPECT_EQ(expectedELWS[2], *(enqueuedLocalWorkSize[2]));
|
||||
}
|
||||
|
||||
class KernelInfoIsaAllocationTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
standaloneAllocation = std::make_unique<MockGraphicsAllocation>();
|
||||
parentAllocation = std::make_unique<MockGraphicsAllocation>();
|
||||
kernelInfo = std::make_unique<MockKernelInfo>();
|
||||
}
|
||||
|
||||
void TearDown() override {}
|
||||
|
||||
std::unique_ptr<MockGraphicsAllocation> standaloneAllocation;
|
||||
std::unique_ptr<MockGraphicsAllocation> parentAllocation;
|
||||
std::unique_ptr<MockKernelInfo> kernelInfo;
|
||||
};
|
||||
|
||||
TEST_F(KernelInfoIsaAllocationTest, givenStandaloneAllocationWhenQueryingIsaPropertiesThenCorrectValuesReturned) {
|
||||
constexpr size_t allocationSize = 4096u;
|
||||
standaloneAllocation->setSize(allocationSize);
|
||||
|
||||
kernelInfo->setIsaPerKernelAllocation(standaloneAllocation.get());
|
||||
|
||||
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getGraphicsAllocation());
|
||||
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(allocationSize, kernelInfo->getIsaSize());
|
||||
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
|
||||
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(0u, kernelInfo->getIsaSubAllocationSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelInfoIsaAllocationTest, givenParentAllocationWhenSettingIsaPropertiesThenCorrectValuesReturned) {
|
||||
constexpr size_t offset = 1024u;
|
||||
constexpr size_t subAllocationSize = 2048u;
|
||||
|
||||
kernelInfo->setIsaParentAllocation(parentAllocation.get());
|
||||
kernelInfo->setIsaSubAllocationOffset(offset);
|
||||
kernelInfo->setIsaSubAllocationSize(subAllocationSize);
|
||||
|
||||
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaParentAllocation());
|
||||
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(subAllocationSize, kernelInfo->getIsaSize());
|
||||
EXPECT_EQ(offset, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(subAllocationSize, kernelInfo->getIsaSubAllocationSize());
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
|
||||
|
||||
// Test updating offset and size
|
||||
constexpr size_t newOffset = 2048u;
|
||||
constexpr size_t newSize = 4096u;
|
||||
kernelInfo->setIsaSubAllocationOffset(newOffset);
|
||||
kernelInfo->setIsaSubAllocationSize(newSize);
|
||||
|
||||
EXPECT_EQ(newOffset, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(newSize, kernelInfo->getIsaSubAllocationSize());
|
||||
EXPECT_EQ(newSize, kernelInfo->getIsaSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelInfoIsaAllocationTest, givenNoAllocationWhenQueryingIsaPropertiesThenDefaultValuesReturned) {
|
||||
EXPECT_EQ(nullptr, kernelInfo->getGraphicsAllocation());
|
||||
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
|
||||
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(0u, kernelInfo->getIsaSubAllocationSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelInfoIsaAllocationTest, givenTransitionBetweenAllocationTypesWhenChangingPropertiesThenCorrectStateIsMaintained) {
|
||||
constexpr size_t standaloneSize = 4096u;
|
||||
constexpr size_t pooledOffset = 512u;
|
||||
constexpr size_t pooledSize = 2048u;
|
||||
|
||||
// Start with standalone
|
||||
standaloneAllocation->setSize(standaloneSize);
|
||||
kernelInfo->setIsaPerKernelAllocation(standaloneAllocation.get());
|
||||
EXPECT_EQ(standaloneSize, kernelInfo->getIsaSize());
|
||||
EXPECT_EQ(standaloneAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(nullptr, kernelInfo->getIsaParentAllocation());
|
||||
|
||||
// Transition to pooled
|
||||
kernelInfo->kernelAllocation = nullptr;
|
||||
kernelInfo->setIsaParentAllocation(parentAllocation.get());
|
||||
kernelInfo->setIsaSubAllocationOffset(pooledOffset);
|
||||
kernelInfo->setIsaSubAllocationSize(pooledSize);
|
||||
|
||||
EXPECT_EQ(pooledSize, kernelInfo->getIsaSize());
|
||||
EXPECT_EQ(parentAllocation.get(), kernelInfo->getIsaGraphicsAllocation());
|
||||
EXPECT_EQ(pooledOffset, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
@@ -233,9 +233,11 @@ class MockProgram : public Program {
|
||||
}
|
||||
|
||||
void debugNotify(const ClDeviceVector &deviceVector, std::unordered_map<uint32_t, BuildPhase> &phasesReached) override {
|
||||
if (callBaseDebugNotify) {
|
||||
Program::debugNotify(deviceVector, phasesReached);
|
||||
wasDebuggerNotified = true;
|
||||
}
|
||||
}
|
||||
|
||||
void callPopulateZebinExtendedArgsMetadataOnce(uint32_t rootDeviceIndex) override {
|
||||
wasPopulateZebinExtendedArgsMetadataOnceCalled = true;
|
||||
@@ -245,18 +247,35 @@ class MockProgram : public Program {
|
||||
}
|
||||
}
|
||||
|
||||
bool transferIsaSegmentsToAllocation(Device *pDevice, std::vector<KernelInfo *> &kernelInfoArray, const Linker::PatchableSegments *isaSegmentsForPatching, uint32_t rootDeviceIndex) override {
|
||||
if (transferIsaSegmentsToAllocationOverride != -1) {
|
||||
return (transferIsaSegmentsToAllocationOverride > 0);
|
||||
}
|
||||
return Program::transferIsaSegmentsToAllocation(pDevice, kernelInfoArray, isaSegmentsForPatching, rootDeviceIndex);
|
||||
}
|
||||
|
||||
bool isIsaPoolingEnabled(Device &neoDevice) override {
|
||||
if (isIsaPoolingEnabledOverride != -1) {
|
||||
return (isIsaPoolingEnabledOverride > 0);
|
||||
}
|
||||
return Program::isIsaPoolingEnabled(neoDevice);
|
||||
}
|
||||
|
||||
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
|
||||
std::map<uint32_t, int> processGenBinaryCalledPerRootDevice;
|
||||
std::map<uint32_t, int> replaceDeviceBinaryCalledPerRootDevice;
|
||||
static int getInternalOptionsCalled;
|
||||
int isFlagOptionOverride = -1;
|
||||
int isOptionValueValidOverride = -1;
|
||||
int transferIsaSegmentsToAllocationOverride = -1;
|
||||
int isIsaPoolingEnabledOverride = -1;
|
||||
bool contextSet = false;
|
||||
bool wasProcessDebugDataCalled = false;
|
||||
bool wasCreateDebugZebinCalled = false;
|
||||
bool wasDebuggerNotified = false;
|
||||
bool wasPopulateZebinExtendedArgsMetadataOnceCalled = false;
|
||||
bool callBasePopulateZebinExtendedArgsMetadataOnce = false;
|
||||
bool callBaseDebugNotify = true;
|
||||
auto getIntermediateRepresentation() const { return this->intermediateRepresentation; }
|
||||
auto getIsGeneratedByIgc() const { return this->isGeneratedByIgc; }
|
||||
auto &getBuildInfos() { return this->buildInfos; }
|
||||
|
||||
@@ -432,6 +432,9 @@ HWTEST2_TEMPLATED_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageI
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.EnableIsaAllocationPool.set(false);
|
||||
|
||||
device->setPreemptionMode(PreemptionMode::Disabled);
|
||||
|
||||
mock->ioctlExpected.gemCreate = 1;
|
||||
|
||||
@@ -838,7 +838,7 @@ TEST(ProgramLinkBinaryTest, whenLinkerUnresolvedExternalThenLinkFailedAndBuildLo
|
||||
expectedUnresolvedExternals.push_back(Linker::UnresolvedExternal{relocation, 0, false});
|
||||
auto expectedError = constructLinkerErrorMessage(expectedUnresolvedExternals, std::vector<std::string>{"kernel : " + kernelInfo.kernelDescriptor.kernelMetadata.kernelName});
|
||||
EXPECT_TRUE(hasSubstr(buildLog, expectedError));
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.getGraphicsAllocation());
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWTEST2_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched, MatchAny) {
|
||||
@@ -978,7 +978,7 @@ TEST(ProgramStringSectionTest, WhenConstStringBufferIsPresentThenUseItForLinking
|
||||
|
||||
program.setLinkerInput(rootDeviceIndex, std::move(linkerInput));
|
||||
|
||||
auto isaCpuPtr = reinterpret_cast<char *>(kernelInfo.getGraphicsAllocation()->getUnderlyingBuffer());
|
||||
auto isaCpuPtr = reinterpret_cast<char *>(kernelInfo.getIsaGraphicsAllocation()->getUnderlyingBuffer());
|
||||
auto patchAddr = ptrOffset(isaCpuPtr, 0x8);
|
||||
|
||||
const char constStringData[] = "Hello World!\n";
|
||||
|
||||
@@ -41,10 +41,12 @@
|
||||
#include "shared/test/common/mocks/mock_ail_configuration.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_compiler_interface.h"
|
||||
#include "shared/test/common/mocks/mock_debugger.h"
|
||||
#include "shared/test/common/mocks/mock_elf.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_modules_zebin.h"
|
||||
#include "shared/test/common/mocks/mock_product_helper.h"
|
||||
#include "shared/test/common/mocks/mock_tbx_csr.h"
|
||||
#include "shared/test/common/mocks/mock_usm_memory_pool.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/utilities/base_object_utils.h"
|
||||
@@ -511,12 +513,67 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphi
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
|
||||
|
||||
auto graphicsAllocation = kernelInfo->getGraphicsAllocation();
|
||||
auto graphicsAllocation = kernelInfo->getIsaGraphicsAllocation();
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
EXPECT_TRUE(graphicsAllocation->is32BitAllocation());
|
||||
auto &helper = pDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
size_t isaPadding = helper.getPaddingForISAAllocation();
|
||||
bool isIsaPooled = (pProgram->getKernelsIsaParentAllocation(rootDeviceIndex) != nullptr);
|
||||
if (!isIsaPooled) {
|
||||
EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.kernelHeapSize + isaPadding);
|
||||
}
|
||||
EXPECT_EQ(kernelInfo->getIsaSize(), kernelInfo->heapInfo.kernelHeapSize + isaPadding);
|
||||
|
||||
auto kernelIsa = ptrOffset(graphicsAllocation->getUnderlyingBuffer(), kernelInfo->getIsaOffsetInParentAllocation());
|
||||
EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa);
|
||||
EXPECT_EQ(0, memcmp(kernelIsa, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.kernelHeapSize));
|
||||
auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex();
|
||||
auto gmmHelper = pDevice->getGmmHelper();
|
||||
EXPECT_EQ(gmmHelper->decanonize(graphicsAllocation->getGpuBaseAddress()), pDevice->getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, graphicsAllocation->isAllocatedInLocalMemoryPool()));
|
||||
}
|
||||
|
||||
class ProgramFromBinaryIsaPoolingTest : public ProgramFromBinaryTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ProgramFromBinaryFixture::SetUp();
|
||||
enableIsaPooling();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ProgramFromBinaryFixture::TearDown();
|
||||
}
|
||||
|
||||
void enableIsaPooling() {
|
||||
pProgram->isIsaPoolingEnabledOverride = 1;
|
||||
}
|
||||
|
||||
void disableIsaPooling() {
|
||||
pProgram->isIsaPoolingEnabledOverride = 0;
|
||||
}
|
||||
|
||||
void defaultIsaPooling() {
|
||||
pProgram->isIsaPoolingEnabledOverride = -1;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramThenIsaAllocationIsPartOfSharedParentAllocation) {
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
|
||||
EXPECT_NE(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
|
||||
|
||||
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
|
||||
|
||||
auto graphicsAllocation = kernelInfo->getIsaGraphicsAllocation();
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
EXPECT_TRUE(graphicsAllocation->is32BitAllocation());
|
||||
|
||||
EXPECT_TRUE(pDevice->getIsaPoolAllocator().isPoolBuffer(graphicsAllocation));
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelAllocation);
|
||||
|
||||
auto expectedIsaSize = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, kernelInfo->heapInfo.kernelHeapSize, true);
|
||||
EXPECT_EQ(expectedIsaSize, kernelInfo->getIsaSize());
|
||||
|
||||
EXPECT_EQ(0u, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
|
||||
auto kernelIsa = graphicsAllocation->getUnderlyingBuffer();
|
||||
EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa);
|
||||
@@ -526,6 +583,53 @@ TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphi
|
||||
EXPECT_EQ(gmmHelper->decanonize(graphicsAllocation->getGpuBaseAddress()), pDevice->getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, graphicsAllocation->isAllocatedInLocalMemoryPool()));
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramAndTransferIsaSegmentsToAllocationFailsThenReturnOutOfHostMemory) {
|
||||
pProgram->transferIsaSegmentsToAllocationOverride = 0;
|
||||
retVal = pProgram->build(pProgram->getDevices(), nullptr);
|
||||
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolWhenBuildingProgramAndRequestGraphicsAllocationForIsaFailsThenReturnOutOfHostMemory) {
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
|
||||
retVal = pProgram->build(pProgram->getDevices(), nullptr);
|
||||
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
|
||||
EXPECT_EQ(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenDisabledIsaAllocationPoolWhenBuildingProgramAndCreateKernelAllocationFailsThenReturnOutOfHostMemory) {
|
||||
disableIsaPooling();
|
||||
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
|
||||
retVal = pProgram->build(pProgram->getDevices(), nullptr);
|
||||
EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal);
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabled2MBLocalMemAlignmentGTPinInitializedWhenBuildingProgramThenIsaAllocationPoolIsNotUsed) {
|
||||
defaultIsaPooling();
|
||||
|
||||
pProgram->callBaseDebugNotify = false;
|
||||
|
||||
auto mockProductHelper = new MockProductHelper;
|
||||
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
isGTPinInitialized = true;
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
isGTPinInitialized = false;
|
||||
|
||||
auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex);
|
||||
|
||||
EXPECT_EQ(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
|
||||
EXPECT_NE(nullptr, kernelInfo->kernelAllocation);
|
||||
EXPECT_FALSE(pDevice->getIsaPoolAllocator().isPoolBuffer(kernelInfo->getIsaGraphicsAllocation()));
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryTest, whenProgramIsBeingRebuildThenOutdatedGlobalBuffersAreFreed) {
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface);
|
||||
@@ -695,6 +799,33 @@ TEST_F(ProgramFromBinaryTest, givenProgramWithGlobalAndConstAllocationsWhenGetti
|
||||
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
|
||||
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
|
||||
|
||||
auto allocs = pProgram->getModuleAllocations(pClDevice->getRootDeviceIndex());
|
||||
|
||||
auto expectedSize = 2u + (pProgram->getKernelsIsaParentAllocation(pClDevice->getRootDeviceIndex()) ? 1u : pProgram->getNumKernels());
|
||||
EXPECT_EQ(expectedSize, allocs.size());
|
||||
|
||||
auto iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface->getGraphicsAllocation());
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
|
||||
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation());
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
|
||||
if (auto isaParentAllocation = pProgram->getKernelsIsaParentAllocation(pClDevice->getRootDeviceIndex());
|
||||
isaParentAllocation != nullptr) {
|
||||
iter = std::find(allocs.begin(), allocs.end(), isaParentAllocation);
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
} else {
|
||||
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getIsaGraphicsAllocation());
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryIsaPoolingTest, givenEnabledIsaAllocationPoolAndProgramWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) {
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
pProgram->processGenBinary(*pClDevice);
|
||||
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
|
||||
pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = std::make_unique<SharedPoolAllocation>(new MockGraphicsAllocation());
|
||||
|
||||
auto allocs = pProgram->getModuleAllocations(pClDevice->getRootDeviceIndex());
|
||||
EXPECT_EQ(pProgram->getNumKernels() + 2u, allocs.size());
|
||||
|
||||
@@ -704,10 +835,127 @@ TEST_F(ProgramFromBinaryTest, givenProgramWithGlobalAndConstAllocationsWhenGetti
|
||||
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface->getGraphicsAllocation());
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
|
||||
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getGraphicsAllocation());
|
||||
iter = std::find(allocs.begin(), allocs.end(), pProgram->buildInfos[pClDevice->getRootDeviceIndex()].kernelInfoArray[0]->getIsaParentAllocation());
|
||||
EXPECT_NE(allocs.end(), iter);
|
||||
}
|
||||
|
||||
HWTEST_F(ProgramFromBinaryIsaPoolingTest, givenTbxModeAndPooledIsaWhenTransferringSegmentsThenWriteMemoryIsCalled) {
|
||||
auto tbxCsr = new MockTbxCsr<FamilyType>(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(tbxCsr);
|
||||
|
||||
retVal = pProgram->build(pProgram->getDevices(), nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, pProgram->getKernelsIsaParentAllocation(rootDeviceIndex));
|
||||
|
||||
EXPECT_TRUE(tbxCsr->writeMemoryGfxAllocCalled);
|
||||
}
|
||||
|
||||
class ProgramIsaPoolingEnabledTest : public ProgramFromBinaryTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ProgramFromBinaryTest::SetUp();
|
||||
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ProgramFromBinaryTest::TearDown();
|
||||
}
|
||||
|
||||
MockProductHelper *mockProductHelper = new MockProductHelper;
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagSetWhenCheckingIsaPoolingThenReturnCorrectValue) {
|
||||
{
|
||||
debugManager.flags.EnableIsaAllocationPool.set(1);
|
||||
EXPECT_TRUE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
}
|
||||
{
|
||||
debugManager.flags.EnableIsaAllocationPool.set(0);
|
||||
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAnd2MBAlignmentDisabledWhenCheckingIsaPoolingThenReturnFalse) {
|
||||
debugManager.flags.EnableIsaAllocationPool.set(-1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
|
||||
|
||||
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
}
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndL0DebuggerPresentWhenCheckingIsaPoolingThenReturnFalse) {
|
||||
debugManager.flags.EnableIsaAllocationPool.set(-1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
pDevice->getRootDeviceEnvironmentRef().debugger.reset(new MockDebugger);
|
||||
pDevice->setDebugger(pDevice->getRootDeviceEnvironmentRef().debugger.get());
|
||||
|
||||
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
|
||||
pDevice->getRootDeviceEnvironmentRef().debugger.reset(nullptr);
|
||||
pDevice->setDebugger(nullptr);
|
||||
}
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndGTPinInitializedWhenCheckingIsaPoolingThenReturnFalse) {
|
||||
debugManager.flags.EnableIsaAllocationPool.set(-1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
isGTPinInitialized = true;
|
||||
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
isGTPinInitialized = false;
|
||||
}
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndKernelBinaryReuseEnabledWhenCheckingIsaPoolingThenReturnFalse) {
|
||||
debugManager.flags.EnableIsaAllocationPool.set(-1);
|
||||
debugManager.flags.ReuseKernelBinaries.set(1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
EXPECT_FALSE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
}
|
||||
|
||||
TEST_F(ProgramIsaPoolingEnabledTest, givenDebugFlagDefaultAndAllConditionsMetWhenCheckingIsaPoolingThenReturnTrue) {
|
||||
debugManager.flags.EnableIsaAllocationPool.set(-1);
|
||||
debugManager.flags.ReuseKernelBinaries.set(0);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
isGTPinInitialized = false;
|
||||
|
||||
EXPECT_TRUE(pProgram->isIsaPoolingEnabled(*pDevice));
|
||||
}
|
||||
|
||||
TEST_F(ProgramFromBinaryTest, givenVariousIsaSizesAndKernelPositionsWhenComputingSizeThenCorrectAlignmentAndPaddingAreApplied) {
|
||||
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
|
||||
auto alignment = gfxCoreHelper.getKernelIsaPointerAlignment();
|
||||
auto padding = gfxCoreHelper.getPaddingForISAAllocation();
|
||||
|
||||
std::vector<size_t> testSizes = {0u, 1u, 64u, 256u, 1024u, 4096u};
|
||||
|
||||
for (auto isaSize : testSizes) {
|
||||
// Test last kernel (with padding)
|
||||
auto sizeWithPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, isaSize, true);
|
||||
EXPECT_EQ(alignUp(isaSize + padding, alignment), sizeWithPadding);
|
||||
EXPECT_TRUE(isAligned(sizeWithPadding, alignment));
|
||||
EXPECT_GE(sizeWithPadding, isaSize + padding);
|
||||
|
||||
// Test not last kernel (without padding)
|
||||
auto sizeWithoutPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, isaSize, false);
|
||||
EXPECT_EQ(alignUp(isaSize, alignment), sizeWithoutPadding);
|
||||
EXPECT_TRUE(isAligned(sizeWithoutPadding, alignment));
|
||||
EXPECT_GE(sizeWithoutPadding, isaSize);
|
||||
|
||||
// Size with padding should be >= size without padding
|
||||
EXPECT_GE(sizeWithPadding, sizeWithoutPadding);
|
||||
}
|
||||
|
||||
// Test already aligned size
|
||||
size_t alignedIsaSize = alignment * 4;
|
||||
auto alignedWithoutPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, alignedIsaSize, false);
|
||||
EXPECT_EQ(alignedIsaSize, alignedWithoutPadding);
|
||||
|
||||
auto alignedWithPadding = pProgram->computeKernelIsaAllocationAlignedSizeWithPadding(*pDevice, alignedIsaSize, true);
|
||||
EXPECT_EQ(alignUp(alignedIsaSize + padding, alignment), alignedWithPadding);
|
||||
}
|
||||
|
||||
using ProgramGetNumKernelsTest = Test<NEOProgramFixture>;
|
||||
TEST_F(ProgramGetNumKernelsTest, givenProgramWithFunctionsWhenGettingNumKernelsFunctionsAreNotExposed) {
|
||||
program->resizeAndPopulateKernelInfoArray(2);
|
||||
@@ -741,13 +989,17 @@ HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledBu
|
||||
auto &csr = pDevice->getGpgpuCommandStreamReceiver();
|
||||
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getGraphicsAllocation();
|
||||
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getIsaGraphicsAllocation();
|
||||
const bool isIsaPooled = pProgram->getKernelsIsaParentAllocation(rootDeviceIndex);
|
||||
kernelAllocation->updateTaskCount(100, csr.getOsContext().getContextId());
|
||||
*csr.getTagAddress() = 0;
|
||||
pProgram->cleanCurrentKernelInfo(rootDeviceIndex);
|
||||
EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||
if (!isIsaPooled) {
|
||||
EXPECT_FALSE(csr.getDeferredAllocations().peekIsEmpty());
|
||||
EXPECT_EQ(csr.getDeferredAllocations().peekHead(), kernelAllocation);
|
||||
} else {
|
||||
EXPECT_TRUE(csr.getDeferredAllocations().peekIsEmpty());
|
||||
}
|
||||
EXPECT_TRUE(this->pDevice->getUltCommandStreamReceiver<FamilyType>().requiresInstructionCacheFlush);
|
||||
}
|
||||
|
||||
@@ -757,7 +1009,7 @@ HWTEST_F(ProgramFromBinaryTest, givenIsaAllocationUsedByMultipleCsrsWhenItIsDele
|
||||
|
||||
pProgram->build(pProgram->getDevices(), nullptr);
|
||||
|
||||
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getGraphicsAllocation();
|
||||
auto kernelAllocation = pProgram->getKernelInfo(static_cast<size_t>(0u), rootDeviceIndex)->getIsaGraphicsAllocation();
|
||||
|
||||
csr0.makeResident(*kernelAllocation);
|
||||
csr1.makeResident(*kernelAllocation);
|
||||
@@ -3267,6 +3519,52 @@ TEST_F(ProgramBinTest, GivenDebugDataAvailableWhenLinkingProgramThenDebugDataIsS
|
||||
EXPECT_NE(nullptr, pProgram->getDebugData(rootDeviceIndex));
|
||||
}
|
||||
|
||||
TEST_F(ProgramBinTest, givenEnabledIsaAllocationPoolWhenMultipleProgramsCreatedThenProgramsShareIsaAllocation) {
|
||||
DebugDataGuard debugDataGuard{true};
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.EnableIsaAllocationPool.set(1);
|
||||
|
||||
const size_t numPrograms = 5;
|
||||
std::vector<MockProgram *> programs;
|
||||
NEO::GraphicsAllocation *sharedAllocation;
|
||||
|
||||
const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n";
|
||||
|
||||
for (size_t i = 0; i < numPrograms; ++i) {
|
||||
programs.emplace_back(Program::create<MockProgram>(
|
||||
pContext,
|
||||
1,
|
||||
&sourceCode,
|
||||
&knownSourceSize,
|
||||
retVal));
|
||||
|
||||
retVal = programs[i]->build(programs[i]->getDevices(), nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
if (i == 0) {
|
||||
sharedAllocation = programs[i]->getKernelsIsaParentAllocation(rootDeviceIndex);
|
||||
EXPECT_TRUE(pDevice->getIsaPoolAllocator().isPoolBuffer(sharedAllocation));
|
||||
}
|
||||
|
||||
auto kernelInfoArray = programs[i]->getKernelInfoArray(pDevice->getRootDeviceIndex());
|
||||
auto offsetForKernelInfo = kernelInfoArray[0]->getIsaOffsetInParentAllocation();
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
EXPECT_EQ(offsetForKernelInfo, kernelInfo->getIsaOffsetInParentAllocation());
|
||||
offsetForKernelInfo += kernelInfo->getIsaSubAllocationSize();
|
||||
}
|
||||
|
||||
// Verify that all kernel infos share same parent allocation
|
||||
if (i != 0) {
|
||||
EXPECT_EQ(sharedAllocation, programs[i]->getKernelsIsaParentAllocation(rootDeviceIndex));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &program : programs) {
|
||||
program->release();
|
||||
}
|
||||
}
|
||||
|
||||
using ProgramMultiRootDeviceTests = MultiRootDeviceFixture;
|
||||
|
||||
TEST_F(ProgramMultiRootDeviceTests, WhenProgramIsCreatedThenBuildInfosVectorIsProperlyResized) {
|
||||
|
||||
@@ -46,7 +46,14 @@ void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *prog
|
||||
kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
mockAlloc = std::make_unique<MockGraphicsAllocation>();
|
||||
|
||||
if (isUsingSharedIsaAllocation) {
|
||||
kernelInfo->setIsaParentAllocation(mockAlloc.get());
|
||||
kernelInfo->setIsaSubAllocationOffset(isaSubAllocOffset);
|
||||
kernelInfo->setIsaSubAllocationSize(isaSubAllocSize);
|
||||
} else {
|
||||
kernelInfo->kernelAllocation = mockAlloc.get();
|
||||
}
|
||||
|
||||
program->addKernelInfo(kernelInfo.get(), rootDeviceIndex);
|
||||
|
||||
|
||||
@@ -27,6 +27,9 @@ class ProgramWithZebinFixture : public ProgramTests {
|
||||
std::unique_ptr<MockBuffer> globalSurface;
|
||||
std::unique_ptr<MockBuffer> constantSurface;
|
||||
const char strings[12] = "Hello olleH";
|
||||
bool isUsingSharedIsaAllocation = false;
|
||||
const size_t isaSubAllocOffset = 48u;
|
||||
const size_t isaSubAllocSize = 308u;
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
void addEmptyZebin(MockProgram *program);
|
||||
|
||||
@@ -42,7 +42,7 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsThenSegmentsArePopulated) {
|
||||
};
|
||||
checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation(), segments.constData);
|
||||
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation(), segments.varData);
|
||||
checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]);
|
||||
checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getIsaGraphicsAllocation(), segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName]);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<uintptr_t>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address);
|
||||
EXPECT_EQ(reinterpret_cast<const char *>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), strings);
|
||||
@@ -65,6 +65,33 @@ TEST_F(ProgramWithZebinFixture, givenZebinSegmentsWithSharedGlobalAndConstSurfac
|
||||
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface.get(), segments.varData);
|
||||
}
|
||||
|
||||
TEST_F(ProgramWithZebinFixture, givenSharedIsaAllocationWhenGetZebinSegmentsThenSegmentsAreCorrectlyPopulated) {
|
||||
isUsingSharedIsaAllocation = true;
|
||||
populateProgramWithSegments(program.get());
|
||||
auto segments = program->getZebinSegments(rootDeviceIndex);
|
||||
|
||||
auto checkGPUSeg = [](NEO::GraphicsAllocation *alloc, NEO::Zebin::Debug::Segments::Segment segment) {
|
||||
EXPECT_EQ(static_cast<uintptr_t>(alloc->getGpuAddress()), segment.address);
|
||||
EXPECT_EQ(static_cast<size_t>(alloc->getUnderlyingBufferSize()), segment.size);
|
||||
};
|
||||
checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface->getGraphicsAllocation(), segments.constData);
|
||||
checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface->getGraphicsAllocation(), segments.varData);
|
||||
|
||||
{
|
||||
auto kernelInfo = program->getKernelInfoArray(rootDeviceIndex)[0];
|
||||
auto segment = segments.nameToSegMap[ZebinTestData::ValidEmptyProgram<>::kernelName];
|
||||
auto isaAlloc = kernelInfo->getIsaGraphicsAllocation();
|
||||
auto offset = kernelInfo->getIsaOffsetInParentAllocation();
|
||||
|
||||
EXPECT_EQ(static_cast<uintptr_t>(isaAlloc->getGpuAddress() + offset), segment.address);
|
||||
EXPECT_EQ(static_cast<size_t>(kernelInfo->getIsaSubAllocationSize()), segment.size);
|
||||
}
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<uintptr_t>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address);
|
||||
EXPECT_EQ(reinterpret_cast<const char *>(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), strings);
|
||||
EXPECT_EQ(program->buildInfos[rootDeviceIndex].constStringSectionData.size, sizeof(strings));
|
||||
}
|
||||
|
||||
TEST_F(ProgramWithZebinFixture, givenNonEmptyDebugDataThenDebugZebinIsNotCreated) {
|
||||
addEmptyZebin(program.get());
|
||||
populateProgramWithSegments(program.get());
|
||||
|
||||
@@ -45,7 +45,7 @@ struct EnqueueFixtureXe2HpgCore : public ::testing::Test {
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
@@ -92,7 +92,7 @@ XE2_HPG_CORETEST_F(MemoryPrefetchTestsXe2HpgCore, givenKernelWhenWalkerIsProgram
|
||||
|
||||
auto gmmHelper = clDevice->getRootDeviceEnvironment().getGmmHelper();
|
||||
|
||||
EXPECT_EQ(gmmHelper->decanonize(mockKernel->kernelInfo.getGraphicsAllocation()->getGpuAddress()), statePrefetchCmd->getAddress());
|
||||
EXPECT_EQ(gmmHelper->decanonize(mockKernel->kernelInfo.getIsaGraphicsAllocation()->getGpuAddress()), statePrefetchCmd->getAddress());
|
||||
EXPECT_TRUE(statePrefetchCmd->getKernelInstructionPrefetch());
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ struct EnqueueFixtureXeHpcCore : public ::testing::Test {
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
|
||||
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getIsaGraphicsAllocation());
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
@@ -90,7 +90,7 @@ XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenKernelWhenWalkerIsProgramme
|
||||
auto statePrefetchCmd = genCmdCast<STATE_PREFETCH *>(*itorStatePrefetch);
|
||||
EXPECT_NE(nullptr, statePrefetchCmd);
|
||||
|
||||
EXPECT_EQ(mockKernel->kernelInfo.getGraphicsAllocation()->getGpuAddress(), statePrefetchCmd->getAddress());
|
||||
EXPECT_EQ(mockKernel->kernelInfo.getIsaGraphicsAllocation()->getGpuAddress(), statePrefetchCmd->getAddress());
|
||||
EXPECT_TRUE(statePrefetchCmd->getKernelInstructionPrefetch());
|
||||
}
|
||||
|
||||
|
||||
@@ -440,6 +440,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceUsmAllocationPool, -1, "-1: default
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmAllocationPool, -1, "-1: default (enabled, 2MB), 0: disabled, >=1: enabled, size in MB")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmAllocationPoolManager, -1, "-1: default, 0: disabled, 1: enabled, use growing pools")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmPoolResidencyTracking, -1, "-1: default, 0: disabled, 1: enabled, track residency per chunk")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableIsaAllocationPool, -1, "-1: default, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmPoolLazyInit, -1, "-1: default, 0: disabled, 1: enabled, initialize usm pools on first alloc")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, UseLocalPreferredForCacheableBuffers, -1, "Use localPreferred for cacheable buffers")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCopyWithStagingBuffers, -1, "Enable copy with non-usm memory through staging buffers. -1: default, 0: disabled, 1: enabled")
|
||||
|
||||
@@ -120,6 +120,31 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) {
|
||||
setIfValidOffset(constants.maxWorkGroupSize, implicitArgs.maxWorkGroupSize);
|
||||
}
|
||||
|
||||
GraphicsAllocation *KernelInfo::getIsaGraphicsAllocation() const {
|
||||
if (auto allocation = this->getIsaParentAllocation(); allocation != nullptr) {
|
||||
DEBUG_BREAK_IF(this->kernelAllocation != nullptr);
|
||||
return allocation;
|
||||
} else {
|
||||
DEBUG_BREAK_IF(this->kernelAllocation == nullptr);
|
||||
return this->kernelAllocation;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t KernelInfo::getIsaSize() const {
|
||||
if (this->getIsaParentAllocation()) {
|
||||
DEBUG_BREAK_IF(this->kernelAllocation != nullptr);
|
||||
return static_cast<uint32_t>(this->isaSubAllocationSize);
|
||||
} else {
|
||||
DEBUG_BREAK_IF(this->kernelAllocation == nullptr);
|
||||
return static_cast<uint32_t>(this->kernelAllocation->getUnderlyingBufferSize());
|
||||
}
|
||||
}
|
||||
|
||||
void KernelInfo::setIsaPerKernelAllocation(GraphicsAllocation *allocation) {
|
||||
DEBUG_BREAK_IF(this->isaParentAllocation != nullptr);
|
||||
this->kernelAllocation = allocation;
|
||||
}
|
||||
|
||||
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {
|
||||
std::string semiColonDelimitedKernelNameStr;
|
||||
|
||||
|
||||
@@ -83,6 +83,30 @@ struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
|
||||
bool createKernelAllocation(const Device &device, bool internalIsa);
|
||||
void apply(const DeviceInfoKernelPayloadConstants &constants);
|
||||
|
||||
uint32_t getIsaSize() const;
|
||||
GraphicsAllocation *getIsaGraphicsAllocation() const;
|
||||
void setIsaPerKernelAllocation(GraphicsAllocation *allocation);
|
||||
|
||||
inline GraphicsAllocation *getIsaParentAllocation() const {
|
||||
return isaParentAllocation;
|
||||
}
|
||||
inline void setIsaParentAllocation(GraphicsAllocation *allocation) {
|
||||
isaParentAllocation = allocation;
|
||||
}
|
||||
inline size_t getIsaOffsetInParentAllocation() const {
|
||||
DEBUG_BREAK_IF(this->kernelAllocation != nullptr && 0u != isaSubAllocationOffset);
|
||||
return isaSubAllocationOffset;
|
||||
}
|
||||
inline void setIsaSubAllocationOffset(size_t offset) {
|
||||
isaSubAllocationOffset = offset;
|
||||
}
|
||||
inline void setIsaSubAllocationSize(size_t size) {
|
||||
isaSubAllocationSize = size;
|
||||
}
|
||||
inline size_t getIsaSubAllocationSize() const {
|
||||
return isaSubAllocationSize;
|
||||
}
|
||||
|
||||
HeapInfo heapInfo = {};
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
char *crossThreadData = nullptr;
|
||||
@@ -97,6 +121,11 @@ struct KernelInfo : NEO::NonCopyableAndNonMovableClass {
|
||||
|
||||
uint64_t shaderHashCode;
|
||||
KernelDescriptor kernelDescriptor;
|
||||
|
||||
private:
|
||||
GraphicsAllocation *isaParentAllocation = nullptr;
|
||||
size_t isaSubAllocationOffset = 0lu;
|
||||
size_t isaSubAllocationSize = 0lu;
|
||||
};
|
||||
|
||||
static_assert(NEO::NonCopyableAndNonMovable<KernelInfo>);
|
||||
|
||||
@@ -593,6 +593,7 @@ EnableUserFenceUponUnbind = -1
|
||||
EnableWaitOnUserFenceAfterBindAndUnbind = -1
|
||||
UseGemCreateExtInAllocateMemoryByKMD = -1
|
||||
PrintMmapAndMunMapCalls = -1
|
||||
EnableIsaAllocationPool = -1
|
||||
UseLocalPreferredForCacheableBuffers = -1
|
||||
EnableFtrTile64Optimization = 0
|
||||
ForceTlbFlushWithTaskCountAfterCopy = -1
|
||||
|
||||
Reference in New Issue
Block a user