mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's patchInfo. Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface, SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1), SPatchInterfaceDescriptorData, SPatchSamplerStateArray, SPatchBindingTableState, SPatchDataParameterBuffer, SPatchDataParameterStream, SPatchThreadPayload, SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface, SPatchAllocateSyncBuffer, SPatchAllocateStatelessConstantMemorySurfaceWithInitialization, SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization, SPatchAllocateSystemThreadSurface. Related-To: NEO-4729 Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
03631ce22b
commit
41f3bd00ff
@@ -3431,7 +3431,7 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
|
||||
}
|
||||
|
||||
if ((pKernel->getExecutionType() != KernelExecutionType::Default) ||
|
||||
pKernel->isUsingSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) {
|
||||
pKernel->usesSyncBuffer(pCommandQueue->getDevice().getRootDeviceIndex())) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
TRACING_EXIT(clEnqueueNDRangeKernel, &retVal);
|
||||
return retVal;
|
||||
@@ -5884,7 +5884,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
|
||||
}
|
||||
}
|
||||
|
||||
if (pKernel->isUsingSyncBuffer(rootDeviceIndex)) {
|
||||
if (pKernel->usesSyncBuffer(rootDeviceIndex)) {
|
||||
if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) {
|
||||
retVal = CL_INVALID_KERNEL;
|
||||
return retVal;
|
||||
|
||||
@@ -537,7 +537,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers(rootDeviceIndex));
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset);
|
||||
kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful);
|
||||
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
|
||||
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
|
||||
|
||||
@@ -106,6 +106,7 @@ class GpgpuWalkerHelper {
|
||||
|
||||
static size_t setGpgpuWalkerThreadData(
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
const KernelDescriptor &kernelDescriptor,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3],
|
||||
@@ -114,7 +115,6 @@ class GpgpuWalkerHelper {
|
||||
uint32_t workDim,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool inlineDataProgrammingRequired,
|
||||
const iOpenCL::SPatchThreadPayload &threadPayload,
|
||||
uint32_t requiredWorkgroupOrder);
|
||||
|
||||
static void dispatchProfilingCommandsStart(
|
||||
|
||||
@@ -20,6 +20,7 @@ namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
inline size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
const KernelDescriptor &kernelDescriptor,
|
||||
const size_t globalOffsets[3],
|
||||
const size_t startWorkGroups[3],
|
||||
const size_t numWorkGroups[3],
|
||||
@@ -28,7 +29,6 @@ inline size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
|
||||
uint32_t workDim,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool inlineDataProgrammingRequired,
|
||||
const iOpenCL::SPatchThreadPayload &threadPayload,
|
||||
uint32_t requiredWorkgroupOrder) {
|
||||
auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2];
|
||||
|
||||
@@ -142,9 +142,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, globalOffsets, globalOffsets, workGroups, localWorkSizes,
|
||||
simd, 1, true, inlineDataProgrammingRequired,
|
||||
*kernelInfo.patchInfo.threadPayload, 0u);
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&cmdWalker, kernelInfo.kernelDescriptor, globalOffsets, globalOffsets, workGroups, localWorkSizes,
|
||||
simd, 1, true, inlineDataProgrammingRequired, 0u);
|
||||
*pGpGpuWalkerCmd = cmdWalker;
|
||||
|
||||
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -105,10 +105,10 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
true,
|
||||
commandQueue.getDevice());
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, globalOffsets, startWorkGroups,
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor,
|
||||
globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
false, false,
|
||||
*kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload, 0u);
|
||||
false, false, 0u);
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
|
||||
*walkerCmdBuf = walkerCmd;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -256,7 +256,7 @@ uint64_t DeviceQueueHw<GfxFamily>::getBlockKernelStartPointer(const Device &devi
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
blockKernelStartPointer += blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
return blockKernelStartPointer;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -182,7 +182,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
|
||||
auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed);
|
||||
|
||||
auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count;
|
||||
auto bindingTableCount = static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries);
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount);
|
||||
|
||||
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
@@ -192,15 +192,14 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
pBlockInfo->heapInfo.pSsh,
|
||||
pBlockInfo->heapInfo.SurfaceStateHeapSize,
|
||||
bindingTableCount,
|
||||
pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
|
||||
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));
|
||||
|
||||
// Determine SIMD size
|
||||
uint32_t simd = pBlockInfo->getMaxSimdSize();
|
||||
DEBUG_BREAK_IF(pBlockInfo->patchInfo.interfaceDescriptorData == nullptr);
|
||||
|
||||
uint32_t idOffset = pBlockInfo->patchInfo.interfaceDescriptorData->Offset;
|
||||
uint32_t idOffset = pBlockInfo->kernelDescriptor.kernelMetadata.deviceSideEnqueueBlockInterfaceDescriptorOffset;
|
||||
const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast<const INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset));
|
||||
|
||||
pIDDestination[blockIndex + i] = *pBlockID;
|
||||
@@ -214,10 +213,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
// Set offset to sampler states, block's DHSOffset is added by scheduler
|
||||
pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast<uint32_t>(pBlockInfo->getBorderColorStateSize()));
|
||||
|
||||
auto threadPayload = pBlockInfo->patchInfo.threadPayload;
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
auto grfSize = device->getDeviceInfo().grfSize;
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
|
||||
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
|
||||
@@ -19,7 +19,7 @@ inline cl_command_queue_capabilities_intel ClHwHelperHw<GfxFamily>::getAdditiona
|
||||
|
||||
template <typename GfxFamily>
|
||||
cl_ulong ClHwHelperHw<GfxFamily>::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const {
|
||||
return kernelInfo.patchInfo.pAllocateStatelessPrivateSurface ? kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize : 0;
|
||||
return kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -35,18 +35,14 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(
|
||||
const Kernel &kernel) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
const auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo;
|
||||
auto samplerCount = patchInfo.samplerStateArray
|
||||
? patchInfo.samplerStateArray->Count
|
||||
: 0;
|
||||
const auto &samplerTable = kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.samplerTable;
|
||||
|
||||
auto samplerCount = samplerTable.numSamplers;
|
||||
auto totalSize = samplerCount
|
||||
? alignUp(samplerCount * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE)
|
||||
: 0;
|
||||
|
||||
auto borderColorSize = patchInfo.samplerStateArray
|
||||
? patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset
|
||||
: 0;
|
||||
|
||||
auto borderColorSize = samplerTable.borderColor;
|
||||
borderColorSize = alignUp(borderColorSize + EncodeStates<GfxFamily>::alignIndirectStatePointer - 1,
|
||||
EncodeStates<GfxFamily>::alignIndirectStatePointer);
|
||||
|
||||
@@ -63,14 +59,12 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
|
||||
const Kernel &kernel,
|
||||
size_t localWorkSize) {
|
||||
typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE;
|
||||
const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex);
|
||||
|
||||
auto threadPayload = kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload;
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
|
||||
return alignUp((kernel.getCrossThreadDataSize(rootDeviceIndex) +
|
||||
getPerThreadDataSizeTotal(kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, localWorkSize)),
|
||||
getPerThreadDataSizeTotal(kernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize)),
|
||||
WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
}
|
||||
|
||||
@@ -132,7 +126,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kern
|
||||
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries));
|
||||
}
|
||||
|
||||
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
|
||||
@@ -233,31 +227,28 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
|
||||
// Copy the kernel over to the ISH
|
||||
const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex);
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
|
||||
ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed());
|
||||
|
||||
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries,
|
||||
kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex),
|
||||
kernel.getNumberOfBindingTableStates(rootDeviceIndex), kernel.getBindingTableOffset(rootDeviceIndex));
|
||||
|
||||
// Copy our sampler state if it exists
|
||||
uint32_t samplerStateOffset = 0;
|
||||
const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable;
|
||||
uint32_t samplerCount = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
samplerCount = patchInfo.samplerStateArray->Count;
|
||||
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset,
|
||||
samplerCount, patchInfo.samplerStateArray->BorderColorOffset,
|
||||
uint32_t samplerStateOffset = 0;
|
||||
if (isValidOffset(samplerTable.tableOffset) && isValidOffset(samplerTable.borderColor)) {
|
||||
samplerCount = samplerTable.numSamplers;
|
||||
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, samplerTable.tableOffset,
|
||||
samplerCount, samplerTable.borderColor,
|
||||
kernel.getDynamicStateHeap(rootDeviceIndex), device.getBindlessHeapsHelper());
|
||||
}
|
||||
|
||||
auto threadPayload = kernelInfo.patchInfo.threadPayload;
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
|
||||
auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems));
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
auto numChannels = static_cast<uint32_t>(kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels);
|
||||
|
||||
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(rootDeviceIndex);
|
||||
|
||||
@@ -348,16 +339,14 @@ bool HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kern
|
||||
checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get();
|
||||
}
|
||||
if (checkKernelForInlineData) {
|
||||
return kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->PassInlineData;
|
||||
return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.passInlineData;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex) {
|
||||
return (kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDXPresent ||
|
||||
kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDYPresent ||
|
||||
kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDZPresent);
|
||||
return kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numLocalIdChannels > 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -35,13 +35,13 @@ size_t PerThreadDataHelper::sendPerThreadData(
|
||||
return offsetPerThreadData;
|
||||
}
|
||||
|
||||
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
|
||||
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd, grfSize));
|
||||
uint32_t PerThreadDataHelper::getThreadPayloadSize(const KernelDescriptor &kernelDescriptor, uint32_t grfSize) {
|
||||
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(kernelDescriptor.kernelAttributes.simdSize, grfSize));
|
||||
uint32_t threadPayloadSize = 0;
|
||||
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
|
||||
threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;
|
||||
threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0;
|
||||
threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0;
|
||||
threadPayloadSize = kernelDescriptor.kernelAttributes.numLocalIdChannels * multiplier * grfSize;
|
||||
threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent) ? grfSize : 0;
|
||||
threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds) ? (grfSize * multiplier) : 0;
|
||||
threadPayloadSize += (kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent) ? grfSize : 0;
|
||||
return threadPayloadSize;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
#include "patch_shared.h"
|
||||
|
||||
@@ -42,12 +43,6 @@ struct PerThreadDataHelper {
|
||||
const std::array<uint8_t, 3> &workgroupWalkOrder,
|
||||
bool hasKernelOnlyImages);
|
||||
|
||||
static inline uint32_t getNumLocalIdChannels(const iOpenCL::SPatchThreadPayload &threadPayload) {
|
||||
return threadPayload.LocalIDXPresent +
|
||||
threadPayload.LocalIDYPresent +
|
||||
threadPayload.LocalIDZPresent;
|
||||
}
|
||||
|
||||
static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize);
|
||||
static uint32_t getThreadPayloadSize(const KernelDescriptor &kernelDescriptor, uint32_t grfSize);
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -136,15 +136,37 @@ inline void patch(const SrcT &src, void *dst, uint32_t dstOffsetBytes) {
|
||||
*patchLocation = static_cast<DstT>(src);
|
||||
}
|
||||
|
||||
void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg) {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
|
||||
void *crossThreadData = getCrossThreadData(rootDeviceIndex);
|
||||
if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) {
|
||||
auto pp = ptrOffset(crossThreadData, arg.stateless);
|
||||
uintptr_t addressToPatch = reinterpret_cast<uintptr_t>(ptrToPatchInCrossThreadData);
|
||||
patchWithRequiredSize(pp, arg.pointerSize, addressToPatch);
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize);
|
||||
this->patchInfoDataList.push_back(patchInfoData);
|
||||
}
|
||||
}
|
||||
|
||||
void *ssh = getSurfaceStateHeap(rootDeviceIndex);
|
||||
if ((nullptr != ssh) & isValidOffset(arg.bindful)) {
|
||||
auto surfaceState = ptrOffset(ssh, arg.bindful);
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PatchTokenT>
|
||||
void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch) {
|
||||
uint32_t crossThreadDataOffset = patch.DataParamOffset;
|
||||
uint32_t pointerSize = patch.DataParamSize;
|
||||
uint32_t sshOffset = patch.SurfaceStateHeapOffset;
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
uint32_t pointerSize = patch.DataParamSize;
|
||||
|
||||
void *crossThreadData = getCrossThreadData(rootDeviceIndex);
|
||||
void *ssh = getSurfaceStateHeap(rootDeviceIndex);
|
||||
if (crossThreadData != nullptr) {
|
||||
uint32_t crossThreadDataOffset = patch.DataParamOffset;
|
||||
auto pp = ptrOffset(crossThreadData, crossThreadDataOffset);
|
||||
uintptr_t addressToPatch = reinterpret_cast<uintptr_t>(ptrToPatchInCrossThreadData);
|
||||
patchWithRequiredSize(pp, pointerSize, addressToPatch);
|
||||
@@ -154,7 +176,9 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
|
||||
}
|
||||
}
|
||||
|
||||
void *ssh = getSurfaceStateHeap(rootDeviceIndex);
|
||||
if (ssh) {
|
||||
uint32_t sshOffset = patch.SurfaceStateHeapOffset;
|
||||
auto surfaceState = ptrOffset(ssh, sshOffset);
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
@@ -185,15 +209,12 @@ cl_int Kernel::initialize() {
|
||||
auto maxSimdSize = kernelInfo.getMaxSimdSize();
|
||||
const auto &workloadInfo = kernelInfo.workloadInfo;
|
||||
const auto &heapInfo = kernelInfo.heapInfo;
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
|
||||
if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) {
|
||||
return CL_INVALID_KERNEL;
|
||||
}
|
||||
|
||||
kernelDeviceInfo.crossThreadDataSize = patchInfo.dataParameterStream
|
||||
? patchInfo.dataParameterStream->DataParameterStreamSize
|
||||
: 0;
|
||||
kernelDeviceInfo.crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
|
||||
// now allocate our own cross-thread data, if necessary
|
||||
if (kernelDeviceInfo.crossThreadDataSize) {
|
||||
@@ -297,11 +318,11 @@ cl_int Kernel::initialize() {
|
||||
memcpy_s(kernelDeviceInfo.pSshLocal.get(), kernelDeviceInfo.sshLocalSize,
|
||||
heapInfo.pSsh, kernelDeviceInfo.sshLocalSize);
|
||||
}
|
||||
kernelDeviceInfo.numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0;
|
||||
kernelDeviceInfo.localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0;
|
||||
kernelDeviceInfo.numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
kernelDeviceInfo.localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset;
|
||||
|
||||
// patch crossthread data and ssh with inline surfaces, if necessary
|
||||
auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, kernelInfo.getMaxSimdSize());
|
||||
auto perHwThreadPrivateMemorySize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||
|
||||
if (perHwThreadPrivateMemorySize) {
|
||||
kernelDeviceInfo.privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch);
|
||||
@@ -318,23 +339,23 @@ cl_int Kernel::initialize() {
|
||||
if (kernelDeviceInfo.privateSurface == nullptr) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
const auto &patch = patchInfo.pAllocateStatelessPrivateSurface;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(kernelDeviceInfo.privateSurface->getGpuAddressToPatch()), *kernelDeviceInfo.privateSurface, pClDevice->getDevice(), *patch);
|
||||
const auto &patch = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(kernelDeviceInfo.privateSurface->getGpuAddressToPatch()), *kernelDeviceInfo.privateSurface, pClDevice->getDevice(), patch);
|
||||
}
|
||||
if (patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) {
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||
DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr);
|
||||
uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
const auto &patch = patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(constMemory), *program->getConstantSurface(rootDeviceIndex), pClDevice->getDevice(), *patch);
|
||||
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(constMemory), *program->getConstantSurface(rootDeviceIndex), pClDevice->getDevice(), arg);
|
||||
}
|
||||
|
||||
if (patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) {
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
|
||||
DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr);
|
||||
uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
const auto &patch = patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), *patch);
|
||||
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), arg);
|
||||
}
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) {
|
||||
@@ -367,7 +388,7 @@ cl_int Kernel::initialize() {
|
||||
if (isParentKernel) {
|
||||
program->allocateBlockPrivateSurfaces(*pClDevice);
|
||||
}
|
||||
if (program->isKernelDebugEnabled() && kernelInfo.patchInfo.pAllocateSystemThreadSurface) {
|
||||
if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) {
|
||||
debugEnabled = true;
|
||||
}
|
||||
auto numArgs = kernelInfo.kernelArgInfo.size();
|
||||
@@ -518,8 +539,8 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
||||
break;
|
||||
|
||||
case CL_KERNEL_ATTRIBUTES:
|
||||
pSrc = defaultKernelInfo.attributes.c_str();
|
||||
srcSize = defaultKernelInfo.attributes.length() + 1;
|
||||
pSrc = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.c_str();
|
||||
srcSize = defaultKernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.length() + 1;
|
||||
break;
|
||||
|
||||
case CL_KERNEL_BINARY_PROGRAM_INTEL:
|
||||
@@ -613,7 +634,6 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
cl_ulong localMemorySize;
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto &kernelInfo = *kernelInfos[rootDeviceIndex];
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
size_t preferredWorkGroupSizeMultiple = 0;
|
||||
cl_ulong scratchSize;
|
||||
@@ -644,9 +664,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
break;
|
||||
|
||||
case CL_KERNEL_LOCAL_MEM_SIZE:
|
||||
localMemorySize = patchInfo.localsurface
|
||||
? patchInfo.localsurface->TotalInlineLocalMemorySize
|
||||
: 0;
|
||||
localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||
srcSize = sizeof(localMemorySize);
|
||||
pSrc = &localMemorySize;
|
||||
break;
|
||||
@@ -661,7 +679,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
break;
|
||||
|
||||
case CL_KERNEL_SPILL_MEM_SIZE_INTEL:
|
||||
scratchSize = kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0;
|
||||
scratchSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
srcSize = sizeof(scratchSize);
|
||||
pSrc = &scratchSize;
|
||||
break;
|
||||
@@ -762,7 +780,6 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
|
||||
workGroupSize2.val[0] = workGroupSize;
|
||||
workGroupSize2.val[1] = (workGroupSize > 0) ? 1 : 0;
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t2>(workGroupSize2));
|
||||
case 3:
|
||||
default:
|
||||
struct size_t3 {
|
||||
size_t val[3];
|
||||
@@ -1881,7 +1898,7 @@ void Kernel::createReflectionSurface() {
|
||||
|
||||
ReflectionSurfaceHelper::getCurbeParams(curbeParamsForBlocks[i], tokenMask[i], firstSSHTokenIndex, *pBlockInfo, hwInfo);
|
||||
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize));
|
||||
|
||||
samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(hwInfo);
|
||||
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
|
||||
@@ -1956,7 +1973,7 @@ void Kernel::createReflectionSurface() {
|
||||
const char *pSrc = pBlockInfo->crossThreadData;
|
||||
memcpy_s(pDst, pBlockInfo->getConstantBufferSize(), pSrc, pBlockInfo->getConstantBufferSize());
|
||||
|
||||
btOffset += pBlockInfo->patchInfo.bindingTableState->Offset;
|
||||
btOffset += pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset;
|
||||
kernelDataOffset = newKernelDataOffset;
|
||||
}
|
||||
|
||||
@@ -2033,14 +2050,14 @@ void Kernel::patchBlocksCurbeWithConstantValues() {
|
||||
uint64_t constantMemoryCurbeOffset = ReflectionSurfaceHelper::undefinedOffset;
|
||||
uint32_t constantMemoryPatchSize = 0;
|
||||
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) {
|
||||
globalMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset;
|
||||
globalMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamSize;
|
||||
if (isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
|
||||
globalMemoryCurbeOffset = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless;
|
||||
globalMemoryPatchSize = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.pointerSize;
|
||||
}
|
||||
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) {
|
||||
constantMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset;
|
||||
constantMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamSize;
|
||||
if (isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||
constantMemoryCurbeOffset = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless;
|
||||
constantMemoryPatchSize = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.pointerSize;
|
||||
}
|
||||
|
||||
ReflectionSurfaceHelper::patchBlocksCurbeWithConstantValues(kernelReflectionSurface->getUnderlyingBuffer(), blockID,
|
||||
@@ -2104,19 +2121,18 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector<IGIL_KernelCurb
|
||||
}
|
||||
tokenMask |= shiftLeftBy(50);
|
||||
|
||||
if (kernelInfo.patchInfo.bindingTableState) {
|
||||
if (isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.bindingTable.tableOffset)) {
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const void *ssh = static_cast<const char *>(kernelInfo.heapInfo.pSsh) + kernelInfo.patchInfo.bindingTableState->Offset;
|
||||
|
||||
for (uint32_t i = 0; i < kernelInfo.patchInfo.bindingTableState->Count; i++) {
|
||||
const void *ssh = static_cast<const char *>(kernelInfo.heapInfo.pSsh) + kernelInfo.kernelDescriptor.payloadMappings.bindingTable.tableOffset;
|
||||
|
||||
for (uint32_t i = 0; i < kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries; i++) {
|
||||
uint32_t pointer = hwHelper.getBindingTableStateSurfaceStatePointer(ssh, i);
|
||||
if (pointer == kernelInfo.kernelArgInfo[argNumber].offsetHeap) {
|
||||
bindingTableIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
DEBUG_BREAK_IF(!((bindingTableIndex != 253) || (kernelInfo.patchInfo.bindingTableState->Count == 0)));
|
||||
DEBUG_BREAK_IF(bindingTableIndex == 253);
|
||||
}
|
||||
|
||||
} else if (kernelInfo.kernelArgInfo[argNumber].isSampler) {
|
||||
@@ -2152,8 +2168,8 @@ void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector<IGIL_KernelCurb
|
||||
}
|
||||
}
|
||||
|
||||
for (auto param : kernelInfo.patchInfo.dataParameterBuffersKernelArgs) {
|
||||
curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param->DataSize, param->Offset, param->ArgumentNumber});
|
||||
for (auto param : kernelInfo.kernelDescriptor.kernelMetadata.allByValueKernelArguments) {
|
||||
curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param.byValueElement.size, param.byValueElement.offset, param.argNum});
|
||||
tokenMask |= shiftLeftBy(DATA_PARAMETER_KERNEL_ARGUMENT);
|
||||
}
|
||||
|
||||
@@ -2210,8 +2226,8 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
|
||||
kernelData->m_numberOfCurbeTokens = static_cast<uint32_t>(curbeParamsIn.size() - kernelInfo.kernelArgInfo.size());
|
||||
kernelData->m_numberOfSamplerStates = static_cast<uint32_t>(kernelInfo.getSamplerStateArrayCount());
|
||||
kernelData->m_SizeOfSamplerHeap = static_cast<uint32_t>(samplerHeapSize);
|
||||
kernelData->m_SamplerBorderColorStateOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->BorderColorOffset : 0;
|
||||
kernelData->m_SamplerStateArrayOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->Offset : (uint32_t)-1;
|
||||
kernelData->m_SamplerBorderColorStateOffsetOnDSH = isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor) ? kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor : 0;
|
||||
kernelData->m_SamplerStateArrayOffsetOnDSH = isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset) ? kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset : -1;
|
||||
kernelData->m_sizeOfConstantBuffer = kernelInfo.getConstantBufferSize();
|
||||
kernelData->m_PatchTokensMask = tokenMaskIn;
|
||||
kernelData->m_ScratchSpacePatchValue = 0;
|
||||
@@ -2223,25 +2239,18 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
|
||||
kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize;
|
||||
|
||||
bool localIdRequired = false;
|
||||
if (kernelInfo.patchInfo.threadPayload) {
|
||||
if (kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent ||
|
||||
kernelInfo.patchInfo.threadPayload->LocalIDXPresent ||
|
||||
kernelInfo.patchInfo.threadPayload->LocalIDYPresent ||
|
||||
kernelInfo.patchInfo.threadPayload->LocalIDZPresent) {
|
||||
localIdRequired = true;
|
||||
}
|
||||
kernelData->m_PayloadSize = PerThreadDataHelper::getThreadPayloadSize(*kernelInfo.patchInfo.threadPayload, kernelData->m_SIMDSize, hwInfo.capabilityTable.grfSize);
|
||||
if (kernelInfo.kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds || (kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels > 0)) {
|
||||
localIdRequired = true;
|
||||
}
|
||||
kernelData->m_PayloadSize = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, hwInfo.capabilityTable.grfSize);
|
||||
|
||||
kernelData->m_NeedLocalIDS = localIdRequired ? 1 : 0;
|
||||
kernelData->m_DisablePreemption = 0u;
|
||||
|
||||
bool concurrentExecAllowed = true;
|
||||
|
||||
if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize > 0) {
|
||||
concurrentExecAllowed = false;
|
||||
}
|
||||
if (kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize > 0) {
|
||||
concurrentExecAllowed = false;
|
||||
}
|
||||
kernelData->m_CanRunConcurently = concurrentExecAllowed ? 1 : 0;
|
||||
|
||||
@@ -2285,7 +2294,7 @@ void Kernel::ReflectionSurfaceHelper::setKernelAddressData(void *reflectionSurfa
|
||||
kernelAddressData->m_ConstantBufferOffset = constantBufferOffset;
|
||||
kernelAddressData->m_SSHTokensOffset = sshTokensOffset;
|
||||
kernelAddressData->m_BTSoffset = btOffset;
|
||||
kernelAddressData->m_BTSize = static_cast<uint32_t>(kernelInfo.patchInfo.bindingTableState ? kernelInfo.patchInfo.bindingTableState->Count * hwHelper.getBindingTableStateSize() : 0);
|
||||
kernelAddressData->m_BTSize = static_cast<uint32_t>(kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries * hwHelper.getBindingTableStateSize());
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -2428,14 +2437,11 @@ void Kernel::provideInitializationHints() {
|
||||
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(),
|
||||
kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize);
|
||||
}
|
||||
const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
|
||||
if (patchInfo.mediavfestate) {
|
||||
auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace;
|
||||
scratchSize *= pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
if (scratchSize > 0) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
|
||||
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
|
||||
}
|
||||
auto scratchSize = kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] *
|
||||
pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(rootDeviceIndex).getMaxSimdSize();
|
||||
if (scratchSize > 0) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH,
|
||||
kernelInfos[rootDeviceIndex]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2487,19 +2493,18 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) {
|
||||
}
|
||||
|
||||
bool Kernel::usesSyncBuffer(uint32_t rootDeviceIndex) {
|
||||
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesSyncBuffer;
|
||||
}
|
||||
|
||||
void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo;
|
||||
auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), patchInfo.pAllocateSyncBuffer->DataParamOffset);
|
||||
patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize,
|
||||
const auto &syncBuffer = kernelInfos[rootDeviceIndex]->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress;
|
||||
auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), syncBuffer.stateless);
|
||||
patchWithRequiredSize(bufferPatchAddress, syncBuffer.pointerSize,
|
||||
ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset));
|
||||
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
|
||||
if (isValidOffset(syncBuffer.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), syncBuffer.bindful);
|
||||
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
|
||||
auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0);
|
||||
@@ -2693,7 +2698,7 @@ uint64_t Kernel::getKernelStartOffset(
|
||||
if (kernelInfos[rootDeviceIndex]->getGraphicsAllocation()) {
|
||||
kernelStartOffset = kernelInfos[rootDeviceIndex]->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
|
||||
kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad;
|
||||
kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2703,7 +2708,7 @@ uint64_t Kernel::getKernelStartOffset(
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
kernelStartOffset += kernelInfos[rootDeviceIndex]->kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
|
||||
return kernelStartOffset;
|
||||
|
||||
@@ -217,11 +217,11 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
Program *getProgram() const { return program; }
|
||||
|
||||
uint32_t getScratchSize(uint32_t rootDeviceIndex) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate ? getKernelInfo(rootDeviceIndex).patchInfo.mediavfestate->PerThreadScratchSpace : 0;
|
||||
return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
}
|
||||
|
||||
uint32_t getPrivateScratchSize(uint32_t rootDeviceIndex) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1 ? getKernelInfo(rootDeviceIndex).patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
|
||||
return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||
}
|
||||
|
||||
void createReflectionSurface();
|
||||
@@ -335,9 +335,6 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
KernelExecutionType getExecutionType() const {
|
||||
return executionType;
|
||||
}
|
||||
bool isUsingSyncBuffer(uint32_t rootDeviceIndex) const {
|
||||
return (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSyncBuffer != nullptr);
|
||||
}
|
||||
|
||||
bool checkIfIsParentKernelAndBlocksUsesPrintf();
|
||||
|
||||
@@ -345,18 +342,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
return getKernelInfo(rootDeviceIndex).gpuPointerSize == 4;
|
||||
}
|
||||
|
||||
int32_t getDebugSurfaceBti(uint32_t rootDeviceIndex) const {
|
||||
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->BTI;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t getPerThreadSystemThreadSurfaceSize(uint32_t rootDeviceIndex) const {
|
||||
if (getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface) {
|
||||
return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
|
||||
}
|
||||
return 0;
|
||||
return getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize;
|
||||
}
|
||||
|
||||
std::vector<PatchInfoData> &getPatchInfoDataList() { return patchInfoDataList; };
|
||||
@@ -496,6 +483,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t rootDeviceIndex);
|
||||
|
||||
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const ArgDescPointer &arg);
|
||||
// Sets-up both crossThreadData and ssh for given implicit (private/constant, etc.) allocation
|
||||
template <typename PatchTokenT>
|
||||
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const Device &device, const PatchTokenT &patch);
|
||||
|
||||
@@ -58,11 +58,12 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf
|
||||
|
||||
auto privateSurface = blockManager->getPrivateSurface(i);
|
||||
|
||||
UNRECOVERABLE_IF(pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface != nullptr && pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize && privateSurface == nullptr);
|
||||
|
||||
UNRECOVERABLE_IF((pBlockInfo->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize > 0U) && privateSurface == nullptr);
|
||||
if (privateSurface) {
|
||||
privateSurfaceOffset = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset;
|
||||
privateSurfacePatchSize = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize;
|
||||
const auto &privateMemory = pBlockInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress;
|
||||
UNRECOVERABLE_IF(false == isValidOffset(privateMemory.stateless));
|
||||
privateSurfaceOffset = privateMemory.stateless;
|
||||
privateSurfacePatchSize = privateMemory.pointerSize;
|
||||
privateSurfaceGpuAddress = privateSurface->getGpuAddressToPatch();
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace NEO {
|
||||
|
||||
void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) {
|
||||
blockKernelInfoArray.push_back(blockKernelInfo);
|
||||
blockUsesPrintf |= blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf;
|
||||
blockUsesPrintf = blockKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf;
|
||||
}
|
||||
|
||||
const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) {
|
||||
|
||||
@@ -298,45 +298,6 @@ void KernelInfo::storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgum
|
||||
storeKernelArgPatchInfo(argNum, pStatelessDeviceQueueKernelArg->DataParamSize, pStatelessDeviceQueueKernelArg->DataParamOffset, 0, pStatelessDeviceQueueKernelArg->SurfaceStateHeapOffset);
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(
|
||||
const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessPrivateSurface = pStatelessPrivateSurfaceArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = pStatelessConstantMemorySurfaceWithInitializationArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) {
|
||||
this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo;
|
||||
attributes = reinterpret_cast<const char *>(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo);
|
||||
|
||||
auto start = attributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = attributes.find(")", start);
|
||||
std::stringstream requiredSubGroupSizeStr(attributes.substr(start, stop - start));
|
||||
requiredSubGroupSizeStr >> requiredSubGroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateSystemThreadSurface = pSystemThreadSurface;
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer) {
|
||||
usesSsh |= true;
|
||||
patchInfo.pAllocateSyncBuffer = pAllocateSyncBuffer;
|
||||
}
|
||||
|
||||
void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) {
|
||||
resizeKernelArgInfoAndRegisterParameter(argNum);
|
||||
|
||||
@@ -350,8 +311,7 @@ void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uin
|
||||
}
|
||||
|
||||
size_t KernelInfo::getSamplerStateArrayCount() const {
|
||||
size_t count = patchInfo.samplerStateArray ? (size_t)patchInfo.samplerStateArray->Count : 0;
|
||||
return count;
|
||||
return kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
}
|
||||
size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const {
|
||||
size_t samplerStateArraySize = getSamplerStateArrayCount() * Sampler::getSamplerStateSize(hwInfo);
|
||||
@@ -360,22 +320,22 @@ size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const {
|
||||
|
||||
size_t KernelInfo::getBorderColorStateSize() const {
|
||||
size_t borderColorSize = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
borderColorSize = patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset;
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) {
|
||||
borderColorSize = kernelDescriptor.payloadMappings.samplerTable.tableOffset - kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
}
|
||||
return borderColorSize;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getBorderColorOffset() const {
|
||||
size_t borderColorOffset = 0;
|
||||
if (patchInfo.samplerStateArray) {
|
||||
borderColorOffset = patchInfo.samplerStateArray->BorderColorOffset;
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) {
|
||||
borderColorOffset = kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
}
|
||||
return borderColorOffset;
|
||||
}
|
||||
|
||||
uint32_t KernelInfo::getConstantBufferSize() const {
|
||||
return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
return kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
}
|
||||
|
||||
bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) {
|
||||
@@ -412,12 +372,9 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) {
|
||||
*(uint32_t *)&(this->crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize;
|
||||
}
|
||||
|
||||
uint32_t privateMemorySize = 0U;
|
||||
if (patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
auto perHwThreadSize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, this->getMaxSimdSize());
|
||||
privateMemorySize = static_cast<uint32_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadSize,
|
||||
constants.computeUnitsUsedForScratch));
|
||||
}
|
||||
auto perHwThreadSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||
uint32_t privateMemorySize = static_cast<uint32_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadSize,
|
||||
constants.computeUnitsUsedForScratch));
|
||||
|
||||
if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) {
|
||||
*(uint32_t *)&(this->crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize;
|
||||
|
||||
@@ -112,12 +112,6 @@ struct KernelInfo {
|
||||
void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg);
|
||||
void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg);
|
||||
void storePatchToken(const SPatchExecutionEnvironment *execEnv);
|
||||
void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
|
||||
void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface);
|
||||
void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer);
|
||||
GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }
|
||||
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
|
||||
if (kernelArgInfo.size() <= argCount) {
|
||||
@@ -170,7 +164,6 @@ struct KernelInfo {
|
||||
bool createKernelAllocation(const Device &device, bool internalIsa);
|
||||
void apply(const DeviceInfoKernelPayloadConstants &constants);
|
||||
|
||||
std::string attributes;
|
||||
HeapInfo heapInfo = {};
|
||||
PatchInfo patchInfo = {};
|
||||
std::vector<KernelArgInfo> kernelArgInfo;
|
||||
@@ -182,7 +175,6 @@ struct KernelInfo {
|
||||
bool hasIndirectStatelessAccess = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t requiredSubGroupSize = 0;
|
||||
uint32_t gpuPointerSize = 0;
|
||||
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
|
||||
uint32_t argumentsToPatchNum = 0;
|
||||
|
||||
@@ -135,9 +135,6 @@ void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelIn
|
||||
|
||||
for (auto &byValArg : src.byValMap) {
|
||||
dstKernelInfo.storeKernelArgument(byValArg);
|
||||
if (byValArg->Type == DATA_PARAMETER_KERNEL_ARGUMENT) {
|
||||
dstKernelInfo.patchInfo.dataParameterBuffersKernelArgs.push_back(byValArg);
|
||||
}
|
||||
}
|
||||
|
||||
dstKernelInfoArg.offsetObjectId = getOffset(src.objectId);
|
||||
@@ -160,17 +157,8 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
dst.heapInfo.pSsh = src.heaps.surfaceState.begin();
|
||||
|
||||
storeTokenIfNotNull(dst, src.tokens.executionEnvironment);
|
||||
dst.patchInfo.samplerStateArray = src.tokens.samplerStateArray;
|
||||
dst.patchInfo.bindingTableState = src.tokens.bindingTableState;
|
||||
dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0);
|
||||
dst.patchInfo.localsurface = src.tokens.allocateLocalSurface;
|
||||
dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U;
|
||||
dst.patchInfo.mediavfestate = src.tokens.mediaVfeState[0];
|
||||
dst.patchInfo.mediaVfeStateSlot1 = src.tokens.mediaVfeState[1];
|
||||
dst.patchInfo.interfaceDescriptorDataLoad = src.tokens.mediaInterfaceDescriptorLoad;
|
||||
dst.patchInfo.interfaceDescriptorData = src.tokens.interfaceDescriptorData;
|
||||
dst.patchInfo.threadPayload = src.tokens.threadPayload;
|
||||
dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream;
|
||||
|
||||
dst.kernelArgInfo.resize(src.tokens.kernelArgs.size());
|
||||
|
||||
@@ -180,15 +168,15 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
populateKernelInfoArg(dst, kernelInfoArg, decodedKernelArg);
|
||||
}
|
||||
|
||||
storeTokenIfNotNull(dst, src.tokens.kernelAttributesInfo);
|
||||
storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface);
|
||||
storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer);
|
||||
if (nullptr != src.tokens.allocateSyncBuffer) {
|
||||
dst.usesSsh = true;
|
||||
}
|
||||
if (nullptr != src.tokens.allocateSystemThreadSurface) {
|
||||
dst.usesSsh = true;
|
||||
}
|
||||
|
||||
dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr);
|
||||
dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U;
|
||||
storeTokenIfNotNull(dst, src.tokens.allocateSystemThreadSurface);
|
||||
|
||||
for (uint32_t i = 0; i < 3U; ++i) {
|
||||
dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]);
|
||||
@@ -221,10 +209,9 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
|
||||
populateKernelDescriptor(dst.kernelDescriptor, src, gpuPointerSizeInBytes);
|
||||
}
|
||||
|
||||
if (dst.patchInfo.dataParameterStream && dst.patchInfo.dataParameterStream->DataParameterStreamSize) {
|
||||
uint32_t crossThreadDataSize = dst.patchInfo.dataParameterStream->DataParameterStreamSize;
|
||||
dst.crossThreadData = new char[crossThreadDataSize];
|
||||
memset(dst.crossThreadData, 0x00, crossThreadDataSize);
|
||||
if (dst.kernelDescriptor.kernelAttributes.crossThreadDataSize) {
|
||||
dst.crossThreadData = new char[dst.kernelDescriptor.kernelAttributes.crossThreadDataSize];
|
||||
memset(dst.crossThreadData, 0x00, dst.kernelDescriptor.kernelAttributes.crossThreadDataSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,26 +48,10 @@ using iOpenCL::SPatchThreadPayload;
|
||||
using iOpenCL::SProgramBinaryHeader;
|
||||
|
||||
struct PatchInfo {
|
||||
const SPatchMediaInterfaceDescriptorLoad *interfaceDescriptorDataLoad = nullptr;
|
||||
const SPatchAllocateLocalSurface *localsurface = nullptr;
|
||||
const SPatchMediaVFEState *mediavfestate = nullptr;
|
||||
const SPatchMediaVFEState *mediaVfeStateSlot1 = nullptr;
|
||||
const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr;
|
||||
const SPatchSamplerStateArray *samplerStateArray = nullptr;
|
||||
const SPatchBindingTableState *bindingTableState = nullptr;
|
||||
::std::vector<const SPatchDataParameterBuffer *> dataParameterBuffersKernelArgs;
|
||||
::std::vector<const SPatchStatelessGlobalMemoryObjectKernelArgument *>
|
||||
statelessGlobalMemObjKernelArgs;
|
||||
::std::vector<const SPatchImageMemoryObjectKernelArgument *>
|
||||
imageMemObjKernelArgs;
|
||||
const SPatchDataParameterStream *dataParameterStream = nullptr;
|
||||
const SPatchThreadPayload *threadPayload = nullptr;
|
||||
const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr;
|
||||
const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr;
|
||||
const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr;
|
||||
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
|
||||
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
|
||||
const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -348,16 +348,13 @@ void Program::allocateBlockPrivateSurfaces(const ClDevice &clDevice) {
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i);
|
||||
|
||||
if (info->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(info->patchInfo.pAllocateStatelessPrivateSurface, info->getMaxSimdSize());
|
||||
auto perHwThreadPrivateMemorySize = info->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||
if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
|
||||
auto privateSize = static_cast<size_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch));
|
||||
|
||||
if (perHwThreadPrivateMemorySize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) {
|
||||
auto privateSize = static_cast<size_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch));
|
||||
|
||||
auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()});
|
||||
blockKernelManager->pushPrivateSurface(privateSurface, i);
|
||||
}
|
||||
auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, clDevice.getDeviceBitfield()});
|
||||
blockKernelManager->pushPrivateSurface(privateSurface, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -35,7 +35,7 @@ class SchedulerKernel : public Kernel {
|
||||
|
||||
size_t getCurbeSize() {
|
||||
auto &defaultKernelInfo = getDefaultKernelInfo();
|
||||
size_t crossTrheadDataSize = defaultKernelInfo.patchInfo.dataParameterStream ? defaultKernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0;
|
||||
size_t crossTrheadDataSize = defaultKernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
size_t dshSize = defaultKernelInfo.heapInfo.DynamicStateHeapSize;
|
||||
|
||||
crossTrheadDataSize = alignUp(crossTrheadDataSize, 64);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -135,9 +135,9 @@ TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenEx
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
SPatchAllocateSyncBuffer patchAllocateSyncBuffer;
|
||||
pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer;
|
||||
populateKernelDescriptor(pProgram->mockKernelInfo.kernelDescriptor, patchAllocateSyncBuffer);
|
||||
|
||||
EXPECT_TRUE(pKernel->isUsingSyncBuffer(testedRootDeviceIndex));
|
||||
EXPECT_TRUE(pKernel->usesSyncBuffer(testedRootDeviceIndex));
|
||||
|
||||
retVal = clEnqueueNDRangeKernel(
|
||||
pCommandQueue,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -141,11 +141,11 @@ TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequire
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
|
||||
size_t requiredSubGroupSize = 0;
|
||||
auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size(");
|
||||
auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start));
|
||||
auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start));
|
||||
}
|
||||
|
||||
EXPECT_EQ(paramValue, requiredSubGroupSize);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -286,11 +286,11 @@ TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSub
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
|
||||
size_t requiredSubGroupSize = 0;
|
||||
auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size(");
|
||||
auto start = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start));
|
||||
auto stop = pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start));
|
||||
}
|
||||
|
||||
EXPECT_EQ(paramValue[0], requiredSubGroupSize);
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/kernel_work_group_info_fixture.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -79,7 +81,7 @@ TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
cl_ulong scratchSpaceSize = static_cast<cl_ulong>(mockKernel.mockKernel->getScratchSize(testedRootDeviceIndex));
|
||||
EXPECT_EQ(scratchSpaceSize, 1024u);
|
||||
@@ -106,7 +108,7 @@ HWTEST2_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocatio
|
||||
MockKernelWithInternals mockKernel(*pDevice);
|
||||
SPatchAllocateStatelessPrivateSurface privateAllocation;
|
||||
privateAllocation.PerThreadPrivateMemorySize = 1024;
|
||||
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &privateAllocation;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, privateAllocation);
|
||||
|
||||
retVal = clGetKernelWorkGroupInfo(
|
||||
mockKernel,
|
||||
@@ -116,9 +118,9 @@ HWTEST2_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocatio
|
||||
¶m_value,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong));
|
||||
EXPECT_EQ(param_value, privateAllocation.PerThreadPrivateMemorySize);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(sizeof(cl_ulong), paramValueSizeRet);
|
||||
EXPECT_EQ(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(privateAllocation, mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize), param_value);
|
||||
}
|
||||
|
||||
TEST_F(clGetKernelWorkGroupInfoTests, givenKernelNotHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenZeroIsReturned) {
|
||||
|
||||
@@ -1046,8 +1046,9 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC
|
||||
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(*pDevice, &program));
|
||||
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
|
||||
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex);
|
||||
const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful;
|
||||
kernel->getAllocatedKernelInfo()->usesSsh = true;
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex);
|
||||
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
|
||||
|
||||
cmdQ.getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
|
||||
@@ -1066,8 +1067,9 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet
|
||||
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(*pDevice, &program));
|
||||
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
|
||||
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex);
|
||||
const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful;
|
||||
kernel->getAllocatedKernelInfo()->usesSsh = true;
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress, rootDeviceIndex);
|
||||
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
|
||||
commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
|
||||
auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation();
|
||||
|
||||
@@ -46,32 +46,35 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
memset(&kernelHeader, 0, sizeof(kernelHeader));
|
||||
kernelHeader.KernelHeapSize = sizeof(kernelIsa);
|
||||
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream);
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, threadPayload);
|
||||
|
||||
SPatchSamplerStateArray samplerArray = {};
|
||||
samplerArray.BorderColorOffset = 0;
|
||||
samplerArray.Count = 1;
|
||||
samplerArray.Offset = 4;
|
||||
samplerArray.Size = 2;
|
||||
samplerArray.Token = 0;
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, samplerArray);
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfoWithSampler.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfoWithSampler.patchInfo.threadPayload = &threadPayload;
|
||||
kernelInfoWithSampler.patchInfo.samplerStateArray = &samplerArray;
|
||||
kernelInfoWithSampler.heapInfo.pDsh = static_cast<const void *>(dsh);
|
||||
}
|
||||
|
||||
@@ -94,9 +97,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
std::unique_ptr<MockProgram> program;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
SPatchSamplerStateArray samplerArray = {};
|
||||
|
||||
KernelInfo kernelInfo;
|
||||
KernelInfo kernelInfoWithSampler;
|
||||
@@ -145,10 +145,10 @@ HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWal
|
||||
size_t numWorkGroups[] = {1, 1, 1};
|
||||
size_t localWorkSizesIn[] = {32, 1, 1};
|
||||
uint32_t simd = 1;
|
||||
iOpenCL::SPatchThreadPayload threadPayload;
|
||||
|
||||
KernelDescriptor kd;
|
||||
GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(
|
||||
computeWalker, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, threadPayload, 5u);
|
||||
computeWalker, kd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 5u);
|
||||
EXPECT_EQ(computeWalker->getSimdSize(), 32 >> 4);
|
||||
}
|
||||
|
||||
@@ -197,10 +197,12 @@ HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntCha
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDispatched) {
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = 0;
|
||||
threadPayload.LocalIDYPresent = 0;
|
||||
threadPayload.LocalIDZPresent = 0;
|
||||
threadPayload.UnusedPerThreadConstantPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -159,7 +159,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhe
|
||||
std::unique_ptr<GMockCommandQueueHw<FamilyType>> mockCmdQ(new GMockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
mockCmdQ->getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
|
||||
|
||||
EXPECT_NE(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface);
|
||||
EXPECT_TRUE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful));
|
||||
|
||||
EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(1).RetiresOnSaturation();
|
||||
|
||||
@@ -175,7 +175,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelWithoutSystemThreadSurfaceWhen
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(*pDevice, &program));
|
||||
kernel->initialize();
|
||||
|
||||
EXPECT_EQ(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface);
|
||||
EXPECT_FALSE(isValidOffset(kernel->getDefaultKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful));
|
||||
|
||||
std::unique_ptr<GMockCommandQueueHw<FamilyType>> mockCmdQ(new GMockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
|
||||
|
||||
@@ -571,8 +571,8 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
||||
MockKernelWithInternals kernelInternals{*pClDevice, context};
|
||||
kernelInternals.kernelInfo.usesSsh = true;
|
||||
kernelInternals.kernelInfo.requiresSshForBuffers = true;
|
||||
kernelInternals.kernelInfo.patchInfo.pAllocateSyncBuffer = &sPatchAllocateSyncBuffer;
|
||||
kernelInternals.kernelInfo.patchInfo.bindingTableState = &sPatchBindingTableState;
|
||||
populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer);
|
||||
populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchBindingTableState);
|
||||
kernelInternals.kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(RENDER_SURFACE_STATE) + sizeof(BINDING_TABLE_STATE);
|
||||
auto kernel = kernelInternals.mockKernel;
|
||||
kernel->initialize();
|
||||
|
||||
@@ -495,14 +495,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
|
||||
HardwareParse hwParser;
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
uint32_t scratchSize = 4096u;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
auto sizeToProgram = PreambleHelper<FamilyType>::getScratchSizeValueToProgramMediaVfeState(scratchSize);
|
||||
|
||||
@@ -536,14 +535,13 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri
|
||||
csr.getMemoryManager()->setForce32BitAllocations(false);
|
||||
size_t off[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
uint32_t privateScratchSize = 4096u;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = privateScratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = privateScratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 1);
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
|
||||
@@ -308,13 +308,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
|
||||
EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
auto scratchSize = GetParam().scratchSize;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
uint32_t sizeToProgram = (scratchSize / static_cast<uint32_t>(MemoryConstants::kiloByte));
|
||||
uint32_t bitValue = 0u;
|
||||
@@ -370,6 +370,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
}
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
auto itorfirstBBEnd = find<typename FamilyType::MI_BATCH_BUFFER_END *>(itorWalker, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorfirstBBEnd);
|
||||
@@ -443,13 +444,13 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi
|
||||
auto mockCsr = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
uint32_t scratchSize = 1024u;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
uint32_t sizeToProgram = (scratchSize / static_cast<uint32_t>(MemoryConstants::kiloByte));
|
||||
uint32_t bitValue = 0u;
|
||||
@@ -486,12 +487,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll
|
||||
auto memoryManager = csr->getMemoryManager();
|
||||
memoryManager->setForce32BitAllocations(true);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
auto scratchSize = 1024;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
enqueueKernel<FamilyType>(mockKernel);
|
||||
auto graphicsAllocation = csr->getScratchAllocation();
|
||||
|
||||
@@ -64,6 +64,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
||||
void SetUpT() {
|
||||
SyncBufferEnqueueHandlerTest::SetUp();
|
||||
kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
|
||||
kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
kernel = kernelInternals->mockKernel;
|
||||
kernel->executionType = KernelExecutionType::Concurrent;
|
||||
commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
@@ -81,7 +82,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
||||
sPatchAllocateSyncBuffer.SurfaceStateHeapOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamSize = sizeof(uint8_t);
|
||||
kernelInternals->kernelInfo.patchInfo.pAllocateSyncBuffer = &sPatchAllocateSyncBuffer;
|
||||
populateKernelDescriptor(kernelInternals->kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer);
|
||||
}
|
||||
|
||||
MockSyncBufferHandler *getSyncBufferHandler() {
|
||||
@@ -176,6 +177,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
kernelInternals->kernelInfo.usesSsh = true;
|
||||
kernelInternals->kernelInfo.requiresSshForBuffers = true;
|
||||
kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless;
|
||||
patchAllocateSyncBuffer();
|
||||
|
||||
pClDevice->allocateSyncBufferHandler();
|
||||
|
||||
@@ -87,9 +87,9 @@ struct WorkGroupSizeBase {
|
||||
Math::divideAndRoundUp(workItems[0], workGroupSize[0]),
|
||||
Math::divideAndRoundUp(workItems[1], workGroupSize[1]),
|
||||
Math::divideAndRoundUp(workItems[2], workGroupSize[2])};
|
||||
const iOpenCL::SPatchThreadPayload threadPayload = {};
|
||||
GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum,
|
||||
workGroupSize, simdSize, dims, true, false, threadPayload, 0u);
|
||||
KernelDescriptor kd;
|
||||
GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(&pCmd, kd, globalOffsets, workGroupsStart, workGroupsNum,
|
||||
workGroupSize, simdSize, dims, true, false, 0u);
|
||||
|
||||
//And check if it is programmed correctly
|
||||
auto numWorkItems = computeWalkerWorkItems<FamilyType>(pCmd);
|
||||
|
||||
@@ -592,10 +592,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu
|
||||
|
||||
size_t GWS = 1;
|
||||
uint32_t scratchSize = 1024;
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
EXPECT_EQ(false, kernel.mockKernel->isBuiltIn);
|
||||
|
||||
@@ -667,6 +667,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu
|
||||
//now re-try to see if SBA is not programmed
|
||||
scratchSize *= 2;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
@@ -707,10 +708,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNDRangeKer
|
||||
|
||||
size_t GWS = 1;
|
||||
uint32_t scratchSize = 1024;
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
populateKernelDescriptor(kernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
EXPECT_EQ(false, kernel.mockKernel->isBuiltIn);
|
||||
|
||||
|
||||
@@ -825,19 +825,17 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte
|
||||
|
||||
auto size = zeroSized ? 0 : 1024;
|
||||
MockKernelWithInternals mockKernel(context->getDevices(), context);
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = size;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
uint32_t computeUnitsForScratch[] = {0x10, 0x20};
|
||||
|
||||
for (auto &pClDevice : context->getDevices()) {
|
||||
auto &deviceInfo = const_cast<DeviceInfo &>(pClDevice->getSharedDeviceInfo());
|
||||
deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()];
|
||||
}
|
||||
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
|
||||
for (auto &pClDevice : context->getDevices()) {
|
||||
@@ -858,16 +856,15 @@ TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThen
|
||||
auto size = zeroSized ? 0 : 1024;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface = {};
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface = {};
|
||||
allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = size;
|
||||
allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128;
|
||||
allocateStatelessPrivateMemorySurface.DataParamOffset = 16;
|
||||
allocateStatelessPrivateMemorySurface.DataParamSize = 8;
|
||||
|
||||
allocateStatelessPrivateMemorySurface.IsSimtThread = isSmitThread;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, allocateStatelessPrivateMemorySurface);
|
||||
|
||||
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &allocateStatelessPrivateMemorySurface;
|
||||
size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch;
|
||||
size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize() : 1;
|
||||
|
||||
|
||||
@@ -571,7 +571,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
|
||||
sPatchPrintfSurface.DataParamOffset = 0;
|
||||
sPatchPrintfSurface.DataParamSize = 8;
|
||||
populateKernelDescriptor(kernelInfo->kernelDescriptor, sPatchPrintfSurface);
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap.insert(std::make_pair(0, testString));
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap[0].assign(testString);
|
||||
|
||||
uint64_t crossThread[10];
|
||||
pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
|
||||
|
||||
@@ -68,7 +68,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
@@ -94,15 +94,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
|
||||
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize / grfSize;
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), numChannels);
|
||||
auto numChannels = pBlockInfo->kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), grfSize, numChannels);
|
||||
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
|
||||
@@ -117,7 +115,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
expectedBlockKernelAddress += pBlockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
|
||||
@@ -133,7 +131,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivate
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
@@ -163,7 +161,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemor
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
if (pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->kernelDescriptor.kernelAttributes.flags.usesPrivateMemory) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
@@ -321,18 +319,16 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), nullptr);
|
||||
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
if (blockKernel->getNumberOfBindingTableStates(rootDeviceIndex) > 0) {
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
|
||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
ASSERT_TRUE(isValidOffset(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
||||
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
|
||||
|
||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
|
||||
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(rootDeviceIndex); ++i) {
|
||||
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
||||
|
||||
@@ -175,7 +175,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
|
||||
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
||||
|
||||
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates(rootDeviceIndex) * sizeof(RENDER_SURFACE_STATE) +
|
||||
pKernel->getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
|
||||
pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries * sizeof(BINDING_TABLE_STATE) +
|
||||
UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
|
||||
if ((pKernel->requiresSshForBuffers(rootDeviceIndex)) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
|
||||
@@ -21,13 +21,13 @@ GEN12LPTEST_F(DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOff
|
||||
KernelInfo *blockInfo = const_cast<KernelInfo *>(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0));
|
||||
blockInfo->createKernelAllocation(device->getDevice(), false);
|
||||
ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation());
|
||||
const_cast<SPatchThreadPayload *>(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234;
|
||||
blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP = 0x1234;
|
||||
|
||||
auto &hwInfo = const_cast<HardwareInfo &>(device->getHardwareInfo());
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_A0, hwInfo);
|
||||
|
||||
uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->kernelDescriptor.entryPoints.skipSetFFIDGP;
|
||||
uint64_t offset = MockDeviceQueueHw<FamilyType>::getBlockKernelStartPointer(device->getDevice(), blockInfo, true);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
MockKernelWithInternals mockKernelWithInternals{*device};
|
||||
mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(mockKernelWithInternals.kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
for (auto isCcsUsed : ::testing::Bool()) {
|
||||
uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed, rootDeviceIndex);
|
||||
|
||||
@@ -36,15 +36,14 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
pMediaVFEstate = new SPatchMediaVFEState();
|
||||
pMediaVFEstate->PerThreadScratchSpace = 1024;
|
||||
pMediaVFEstate->ScratchSpaceOffset = 0;
|
||||
SPatchMediaVFEState mediaVFEstate = {};
|
||||
mediaVFEstate.PerThreadScratchSpace = 1024;
|
||||
mediaVFEstate.ScratchSpaceOffset = 0;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface printfSurface = {};
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, printfSurface);
|
||||
|
||||
pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate;
|
||||
|
||||
KernelArgPatchInfo kernelArg1PatchInfo;
|
||||
KernelArgPatchInfo kernelArg2PatchInfo;
|
||||
KernelArgPatchInfo kernelArg3PatchInfo;
|
||||
@@ -76,7 +75,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
|
||||
void TearDown() override {
|
||||
delete pKernel;
|
||||
delete pMediaVFEstate;
|
||||
delete pProgram;
|
||||
|
||||
ContextFixture::TearDown();
|
||||
@@ -84,7 +82,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelInfo> pKernelInfo;
|
||||
SPatchMediaVFEState *pMediaVFEstate = nullptr;
|
||||
MockProgram *pProgram = nullptr;
|
||||
MockKernel *pKernel = nullptr;
|
||||
char pCrossThreadData[128];
|
||||
|
||||
@@ -33,10 +33,10 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture {
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
|
||||
pMediaVFEstate = new SPatchMediaVFEState();
|
||||
pMediaVFEstate->PerThreadScratchSpace = 1024;
|
||||
pMediaVFEstate->ScratchSpaceOffset = 0;
|
||||
pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate;
|
||||
SPatchMediaVFEState mediaVFEstate = {};
|
||||
mediaVFEstate.PerThreadScratchSpace = 1024;
|
||||
mediaVFEstate.ScratchSpaceOffset = 0;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface printfSurface = {};
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, printfSurface);
|
||||
@@ -48,7 +48,6 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture {
|
||||
}
|
||||
void TearDown() override {
|
||||
delete pKernel;
|
||||
delete pMediaVFEstate;
|
||||
delete pProgram;
|
||||
|
||||
ContextFixture::TearDown();
|
||||
@@ -56,7 +55,6 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture {
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelInfo> pKernelInfo;
|
||||
SPatchMediaVFEState *pMediaVFEstate = nullptr;
|
||||
MockProgram *pProgram = nullptr;
|
||||
MockKernel *pKernel = nullptr;
|
||||
};
|
||||
|
||||
@@ -587,8 +587,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
|
||||
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
ASSERT_NE(nullptr, modifiedKernelInfo.patchInfo.threadPayload);
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*modifiedKernelInfo.patchInfo.threadPayload);
|
||||
auto numChannels = modifiedKernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
||||
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize);
|
||||
ASSERT_LE(expectedIohSize, ioh.getUsed());
|
||||
|
||||
@@ -692,26 +691,26 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// define patch offsets for global, constant, private, event pool and default device queue surfaces
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.GlobalBufferIndex = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.GlobalBufferIndex = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.ConstantBufferIndex = 0;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 64;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 8;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.ConstantBufferIndex = 0;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 64;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 8;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface;
|
||||
AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 32;
|
||||
AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128;
|
||||
AllocateStatelessPrivateMemorySurface.DataParamOffset = 16;
|
||||
AllocateStatelessPrivateMemorySurface.DataParamSize = 8;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface;
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface;
|
||||
allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 32;
|
||||
allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128;
|
||||
allocateStatelessPrivateMemorySurface.DataParamOffset = 16;
|
||||
allocateStatelessPrivateMemorySurface.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateMemorySurface);
|
||||
|
||||
SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface;
|
||||
allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 192;
|
||||
@@ -767,14 +766,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
bindingTableState.Count = 5;
|
||||
bindingTableState.Offset = btiOffset;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
pKernelInfo->patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, bindingTableState);
|
||||
|
||||
// setup thread payload
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
pKernelInfo->patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, threadPayload);
|
||||
|
||||
// define stateful path
|
||||
pKernelInfo->usesSsh = true;
|
||||
@@ -948,15 +947,87 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat
|
||||
bindingTableState.Count = 0;
|
||||
bindingTableState.Offset = 64;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
pKernelInfo->patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, bindingTableState);
|
||||
|
||||
dstBindingTablePointer = pushBindingTableAndSurfaceStates<FamilyType>(ssh, *pKernel);
|
||||
EXPECT_EQ(0u, dstBindingTablePointer);
|
||||
|
||||
pKernelInfo->patchInfo.bindingTableState = nullptr;
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerStateArrayWhenSendIndirectStateIsCalledThenInterfaceDescriptorIsNotPopulated) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
CommandQueueHw<FamilyType> cmdQ(pContext, pClDevice, 0, false);
|
||||
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
auto pWalkerCmd = static_cast<GPGPU_WALKER *>(commandStream.getSpace(sizeof(GPGPU_WALKER)));
|
||||
*pWalkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
|
||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
|
||||
const size_t localWorkSize = 256;
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex);
|
||||
|
||||
//Undefined Offset, Defined BorderColorOffset
|
||||
SPatchSamplerStateArray samplerStateArray = {};
|
||||
samplerStateArray.BorderColorOffset = 0;
|
||||
samplerStateArray.Count = 2;
|
||||
samplerStateArray.Offset = undefined<uint16_t>;
|
||||
samplerStateArray.Size = sizeof(SPatchSamplerStateArray);
|
||||
samplerStateArray.Token = 1;
|
||||
populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
*pDevice);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer());
|
||||
EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount());
|
||||
|
||||
//Defined Offset, Undefined BorderColorOffset
|
||||
samplerStateArray.Offset = 0;
|
||||
samplerStateArray.BorderColorOffset = undefined<uint16_t>;
|
||||
populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray);
|
||||
|
||||
HardwareCommandsHelper<FamilyType>::sendIndirectState(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
*mockKernelWithInternal->mockKernel,
|
||||
mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex),
|
||||
mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
nullptr,
|
||||
true,
|
||||
*pDevice);
|
||||
|
||||
interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer());
|
||||
EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenIndirectStateIsProgrammedThenBorderColorIsCorrectlyCopiedToDshAndSamplerStatesAreProgrammedWithPointer) {
|
||||
typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE;
|
||||
@@ -979,20 +1050,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
||||
const uint32_t borderColorSize = 64;
|
||||
const uint32_t samplerStateSize = sizeof(SAMPLER_STATE) * 2;
|
||||
|
||||
SPatchSamplerStateArray samplerStateArray;
|
||||
SPatchSamplerStateArray samplerStateArray = {};
|
||||
samplerStateArray.BorderColorOffset = 0x0;
|
||||
samplerStateArray.Count = 2;
|
||||
samplerStateArray.Offset = borderColorSize;
|
||||
samplerStateArray.Size = samplerStateSize;
|
||||
samplerStateArray.Token = 1;
|
||||
populateKernelDescriptor(mockKernelWithInternal->kernelInfo.kernelDescriptor, samplerStateArray);
|
||||
|
||||
char *mockDsh = new char[(borderColorSize + samplerStateSize) * 4];
|
||||
|
||||
memset(mockDsh, 6, borderColorSize);
|
||||
memset(mockDsh + borderColorSize, 8, borderColorSize);
|
||||
|
||||
mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh;
|
||||
mockKernelWithInternal->kernelInfo.patchInfo.samplerStateArray = &samplerStateArray;
|
||||
|
||||
uint64_t interfaceDescriptorTableOffset = dsh.getUsed();
|
||||
dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA));
|
||||
@@ -1100,7 +1169,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeR
|
||||
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0);
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries));
|
||||
}
|
||||
|
||||
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
||||
@@ -1128,22 +1197,21 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineT
|
||||
DebugManager.flags.EnablePassInlineData.set(1u);
|
||||
|
||||
uint32_t crossThreadData[8];
|
||||
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true;
|
||||
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) {
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true;
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnablePassInlineData.set(0u);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true;
|
||||
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
@@ -1153,41 +1221,19 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInli
|
||||
|
||||
uint32_t crossThreadData[8];
|
||||
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0;
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = false;
|
||||
mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsBiggerThanZeroThenExpectLocalIdsInUseIsTrue) {
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 1;
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1;
|
||||
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) {
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0;
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0;
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsZeroThenExpectLocalIdsInUseIsFalse) {
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 0;
|
||||
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex));
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ struct HardwareCommandsTest : ClDeviceFixture,
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
||||
return EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count : 0,
|
||||
return EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.bindingTable.numEntries,
|
||||
srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex),
|
||||
srcKernel.getNumberOfBindingTableStates(rootDeviceIndex), srcKernel.getBindingTableOffset(rootDeviceIndex));
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "opencl/source/helpers/per_thread_data.h"
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "patch_shared.h"
|
||||
@@ -26,22 +27,22 @@ struct PerThreadDataTests : public ClDeviceFixture,
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
|
||||
threadPayload = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = localIdX ? 1 : 0;
|
||||
threadPayload.LocalIDYPresent = localIdY ? 1 : 0;
|
||||
threadPayload.LocalIDZPresent = localIdZ ? 1 : 0;
|
||||
threadPayload.LocalIDFlattenedPresent = flattenedId;
|
||||
threadPayload.UnusedPerThreadConstantPresent =
|
||||
!(localIdX || localIdY || localIdZ || flattenedId);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
simd = 32;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = simd;
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
simd = kernelInfo.getMaxSimdSize();
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = kernelInfo.getMaxSimdSize();
|
||||
numChannels = threadPayload.LocalIDXPresent +
|
||||
threadPayload.LocalIDYPresent +
|
||||
threadPayload.LocalIDZPresent;
|
||||
@@ -66,7 +67,6 @@ struct PerThreadDataTests : public ClDeviceFixture,
|
||||
size_t indirectHeapMemorySize;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchThreadPayload threadPayload;
|
||||
KernelInfo kernelInfo;
|
||||
};
|
||||
|
||||
@@ -121,22 +121,20 @@ HWTEST_F(PerThreadDataXYZTests, Given2x4x8WhenSendingPerThreadDataThenCorrectAmo
|
||||
}
|
||||
|
||||
HWTEST_F(PerThreadDataXYZTests, GivenDifferentSimdWhenGettingThreadPayloadSizeThenCorrectSizeIsReturned) {
|
||||
simd = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 2u * 3u, size);
|
||||
|
||||
simd = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 3u, size);
|
||||
|
||||
simd = 16;
|
||||
threadPayload.HeaderPresent = 1;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 4u, size);
|
||||
|
||||
simd = 16;
|
||||
threadPayload.UnusedPerThreadConstantPresent = 1;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = true;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 5u, size);
|
||||
}
|
||||
|
||||
@@ -181,39 +179,36 @@ HWTEST_F(PerThreadDataNoIdsTests, GivenThreadPaylodDataWithoutLocalIdsWhenSendin
|
||||
}
|
||||
|
||||
HWTEST_F(PerThreadDataNoIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) {
|
||||
simd = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize, size);
|
||||
|
||||
simd = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize, size);
|
||||
|
||||
simd = 16;
|
||||
threadPayload.HeaderPresent = 1;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 2u, size);
|
||||
}
|
||||
|
||||
typedef PerThreadDataTests<false, false, false, true> PerThreadDataFlattenedIdsTests;
|
||||
|
||||
HWTEST_F(PerThreadDataFlattenedIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) {
|
||||
simd = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 2u, size);
|
||||
|
||||
simd = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize, size);
|
||||
|
||||
simd = 16;
|
||||
threadPayload.HeaderPresent = 1;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 2u, size);
|
||||
|
||||
simd = 32;
|
||||
threadPayload.HeaderPresent = 1;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize);
|
||||
EXPECT_EQ(grfSize * 3u, size);
|
||||
}
|
||||
|
||||
@@ -226,7 +221,7 @@ TEST(PerThreadDataTest, WhenSettingLocalIdsInPerThreadDataThenIdsAreSetInCorrect
|
||||
const std::array<uint16_t, 3> localWorkSizes = {{24, 1, 1}};
|
||||
const std::array<uint8_t, 3> workgroupWalkOrder = {{0, 1, 2}};
|
||||
|
||||
auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, numChannels, localWorkSize, grfSize);
|
||||
auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize);
|
||||
|
||||
auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4;
|
||||
auto buffer = static_cast<char *>(alignedMalloc(sizeOverSizedBuffer, 16));
|
||||
|
||||
@@ -16,15 +16,6 @@
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetDebugSurfaceBtiIsCalledThenCorrectValueIsReturned) {
|
||||
auto device = std::make_unique<MockClDevice>(new MockDevice);
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockDebugKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(0, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) {
|
||||
auto device = std::make_unique<MockClDevice>(new MockDevice);
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
@@ -54,15 +45,6 @@ TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenQueryingIsKernelDebugEnable
|
||||
EXPECT_FALSE(kernel->isKernelDebugEnabled());
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetDebugSurfaceBtiIsCalledThenInvalidIndexValueIsReturned) {
|
||||
auto device = std::make_unique<MockClDevice>(new MockDevice);
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
program.enableKernelDebug();
|
||||
std::unique_ptr<MockKernel> kernel(MockKernel::create<MockKernel>(device->getDevice(), &program));
|
||||
|
||||
EXPECT_EQ(-1, kernel->getDebugSurfaceBti(device->getRootDeviceIndex()));
|
||||
}
|
||||
|
||||
TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) {
|
||||
auto device = std::make_unique<MockClDevice>(new MockDevice);
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
|
||||
@@ -83,8 +83,8 @@ struct MultiDeviceKernelArgBufferTest : public ::testing::Test {
|
||||
pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo);
|
||||
pKernelInfosStorage[i]->kernelArgInfo[0].isBuffer = true;
|
||||
|
||||
pKernelInfosStorage[i]->patchInfo.dataParameterStream = &dataParameterStream[i];
|
||||
dataParameterStream[i].DataParameterStreamSize = (i + 1) * sizeof(void *);
|
||||
populateKernelDescriptor(pKernelInfosStorage[i]->kernelDescriptor, dataParameterStream[i]);
|
||||
|
||||
pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = i * sizeof(void *);
|
||||
pKernelInfosStorage[i]->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(void *);
|
||||
|
||||
@@ -193,8 +193,7 @@ HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithSetBindingTableStateAnd
|
||||
SPatchBindingTableState bindingTableStateInfo;
|
||||
bindingTableStateInfo.Offset = 0;
|
||||
bindingTableStateInfo.Count = 4;
|
||||
|
||||
info.patchInfo.bindingTableState = &bindingTableStateInfo;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo);
|
||||
|
||||
BINDING_TABLE_STATE bindingTableState[4];
|
||||
|
||||
@@ -245,8 +244,7 @@ HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBindingTableStateAndIma
|
||||
SPatchBindingTableState bindingTableStateInfo;
|
||||
bindingTableStateInfo.Offset = 0;
|
||||
bindingTableStateInfo.Count = 0;
|
||||
|
||||
info.patchInfo.bindingTableState = &bindingTableStateInfo;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo);
|
||||
|
||||
BINDING_TABLE_STATE bindingTableState[1];
|
||||
|
||||
@@ -347,8 +345,9 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndDataParameterBuf
|
||||
dataParameterBuffer.SourceOffset = 0;
|
||||
dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT;
|
||||
|
||||
info.patchInfo.dataParameterBuffersKernelArgs.push_back(&dataParameterBuffer);
|
||||
info.storeKernelArgument(&dataParameterBuffer);
|
||||
info.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
||||
populateKernelArgDescriptor(info.kernelDescriptor, 0, dataParameterBuffer);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParams;
|
||||
uint64_t tokenMask = 0;
|
||||
@@ -659,14 +658,14 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
dataParameterStream.DataParameterStreamSize = 0;
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchBindingTableState bindingTableState;
|
||||
bindingTableState.Count = 0;
|
||||
bindingTableState.Offset = 0;
|
||||
bindingTableState.Size = 0;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
info.patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableState);
|
||||
|
||||
KernelInfoContainer kernelInfos;
|
||||
kernelInfos.push_back(&info);
|
||||
@@ -712,14 +711,14 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
dataParameterStream.DataParameterStreamSize = 0;
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchBindingTableState bindingTableState;
|
||||
bindingTableState.Count = 0;
|
||||
bindingTableState.Offset = 0;
|
||||
bindingTableState.Size = 0;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
info.patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableState);
|
||||
|
||||
KernelArgInfo argInfo;
|
||||
argInfo.isDeviceQueue = true;
|
||||
@@ -787,7 +786,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne
|
||||
|
||||
blockCurbeParamCounts[i] = curbeParamsForBlock.size();
|
||||
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize));
|
||||
totalCurbeParamsSize += blockCurbeParamCounts[i];
|
||||
|
||||
size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo());
|
||||
@@ -1234,21 +1233,21 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
|
||||
samplerStateArray.Offset = 5;
|
||||
samplerStateArray.Size = 16;
|
||||
samplerStateArray.Token = 1;
|
||||
|
||||
info.patchInfo.samplerStateArray = &samplerStateArray;
|
||||
populateKernelDescriptor(info.kernelDescriptor, samplerStateArray);
|
||||
|
||||
dataParameterStream.DataParameterStreamSize = 60;
|
||||
dataParameterStream.Size = 20;
|
||||
dataParameterStream.Token = 3;
|
||||
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
info.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
info.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
|
||||
info.patchInfo.threadPayload = &threadPayload;
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
populateKernelDescriptor(info.kernelDescriptor, threadPayload);
|
||||
|
||||
info.patchInfo.pAllocateStatelessPrivateSurface = &privateSurface;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurface;
|
||||
populateKernelDescriptor(info.kernelDescriptor, privateSurface);
|
||||
|
||||
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 4;
|
||||
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 8;
|
||||
@@ -1269,8 +1268,6 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
|
||||
KernelInfo info;
|
||||
SPatchSamplerStateArray samplerStateArray;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurface;
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParams;
|
||||
};
|
||||
@@ -1293,12 +1290,16 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
|
||||
|
||||
std::tie(localIDPresent, privateSurfaceSize) = GetParam();
|
||||
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDFlattenedPresent = localIDPresent.flattend;
|
||||
threadPayload.LocalIDXPresent = localIDPresent.x;
|
||||
threadPayload.LocalIDYPresent = localIDPresent.y;
|
||||
threadPayload.LocalIDZPresent = localIDPresent.z;
|
||||
populateKernelDescriptor(info.kernelDescriptor, threadPayload);
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface privateSurface = {};
|
||||
privateSurface.PerThreadPrivateMemorySize = privateSurfaceSize;
|
||||
populateKernelDescriptor(info.kernelDescriptor, privateSurface);
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[4096]);
|
||||
|
||||
@@ -1351,8 +1352,6 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
|
||||
}
|
||||
|
||||
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
|
||||
info.patchInfo.threadPayload = nullptr;
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[4096]);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParams;
|
||||
@@ -1380,11 +1379,11 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSetti
|
||||
}
|
||||
|
||||
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullPrivateSurfaceWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
|
||||
info.patchInfo.pAllocateStatelessPrivateSurface = nullptr;
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[4096]);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParams;
|
||||
SPatchAllocateStatelessPrivateSurface patch = {};
|
||||
populateKernelDescriptor(info.kernelDescriptor, patch);
|
||||
|
||||
uint64_t tokenMask = 1 | 2 | 4;
|
||||
|
||||
@@ -1409,7 +1408,8 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullPrivateSurfaceWhenSett
|
||||
}
|
||||
|
||||
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullSamplerStateWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
|
||||
info.patchInfo.samplerStateArray = nullptr;
|
||||
SPatchSamplerStateArray samplerStateArray = {};
|
||||
populateKernelDescriptor(info.kernelDescriptor, samplerStateArray);
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[4096]);
|
||||
|
||||
@@ -1462,8 +1462,6 @@ TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenDisabledConcurrentExecutio
|
||||
TEST_F(ReflectionSurfaceHelperFixture, GivenNullBindingTableWhenSettingKernelDataThenDataIsCorrectlySet) {
|
||||
KernelInfo info;
|
||||
|
||||
info.patchInfo.bindingTableState = nullptr;
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[200]);
|
||||
IGIL_KernelAddressData *kernalAddressData = reinterpret_cast<IGIL_KernelAddressData *>(kernelDataMemory.get());
|
||||
MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getClDevice(0)->getHardwareInfo());
|
||||
@@ -1482,8 +1480,7 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenSetBindingTableWhenSettingKernelData
|
||||
SPatchBindingTableState bindingTableStateInfo;
|
||||
bindingTableStateInfo.Offset = 0;
|
||||
bindingTableStateInfo.Count = 4;
|
||||
|
||||
info.patchInfo.bindingTableState = &bindingTableStateInfo;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableStateInfo);
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[200]);
|
||||
IGIL_KernelAddressData *kernalAddressData = reinterpret_cast<IGIL_KernelAddressData *>(kernelDataMemory.get());
|
||||
@@ -1501,8 +1498,6 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenSetBindingTableWhenSettingKernelData
|
||||
TEST_F(ReflectionSurfaceHelperFixture, WhenPatchingBlocksCurbeThenAddressesAreSetCorrectly) {
|
||||
KernelInfo info;
|
||||
|
||||
info.patchInfo.bindingTableState = nullptr;
|
||||
|
||||
std::unique_ptr<char> refletionSurfaceMemory(new char[4096]);
|
||||
IGIL_KernelDataHeader *header = reinterpret_cast<IGIL_KernelDataHeader *>(refletionSurfaceMemory.get());
|
||||
header->m_numberOfKernels = 2;
|
||||
@@ -1549,8 +1544,6 @@ TEST_F(ReflectionSurfaceHelperFixture, WhenPatchingBlocksCurbeThenAddressesAreSe
|
||||
TEST_F(ReflectionSurfaceHelperFixture, GivenUndefinedOffsetsWhenPatchingBlocksCurbeThenAddressesAreSetCorrectly) {
|
||||
KernelInfo info;
|
||||
|
||||
info.patchInfo.bindingTableState = nullptr;
|
||||
|
||||
std::unique_ptr<char> refletionSurfaceMemory(new char[4096]);
|
||||
IGIL_KernelDataHeader *header = reinterpret_cast<IGIL_KernelDataHeader *>(refletionSurfaceMemory.get());
|
||||
header->m_numberOfKernels = 2;
|
||||
@@ -1948,7 +1941,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWh
|
||||
|
||||
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
|
||||
|
||||
uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset;
|
||||
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless;
|
||||
|
||||
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
|
||||
|
||||
@@ -1982,7 +1975,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAn
|
||||
|
||||
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
|
||||
|
||||
uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset;
|
||||
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless;
|
||||
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
|
||||
|
||||
EXPECT_EQ(0u, *pCurbe);
|
||||
@@ -2015,7 +2008,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory
|
||||
|
||||
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
|
||||
|
||||
uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset;
|
||||
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless;
|
||||
|
||||
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset);
|
||||
uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
|
||||
@@ -2058,7 +2051,7 @@ TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemory
|
||||
|
||||
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
|
||||
|
||||
uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset;
|
||||
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless;
|
||||
|
||||
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset);
|
||||
uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
|
||||
@@ -2093,14 +2086,14 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
dataParameterStream.DataParameterStreamSize = 0;
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchBindingTableState bindingTableState;
|
||||
bindingTableState.Count = 0;
|
||||
bindingTableState.Offset = 0;
|
||||
bindingTableState.Size = 0;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
info.patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableState);
|
||||
|
||||
auto rootDeviceIndex = device1->getRootDeviceIndex();
|
||||
KernelInfoContainer kernelInfos;
|
||||
@@ -2148,14 +2141,14 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
dataParameterStream.DataParameterStreamSize = 0;
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchBindingTableState bindingTableState;
|
||||
bindingTableState.Count = 0;
|
||||
bindingTableState.Offset = 0;
|
||||
bindingTableState.Size = 0;
|
||||
bindingTableState.SurfaceStateOffset = 0;
|
||||
info.patchInfo.bindingTableState = &bindingTableState;
|
||||
populateKernelDescriptor(info.kernelDescriptor, bindingTableState);
|
||||
|
||||
KernelArgInfo argInfo;
|
||||
argInfo.isDeviceQueue = true;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,21 +22,22 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
|
||||
ClDeviceFixture::SetUp();
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
|
||||
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchThreadPayload threadPayload;
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
}
|
||||
void TearDown() override {
|
||||
ClDeviceFixture::TearDown();
|
||||
@@ -48,8 +49,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
|
||||
std::unique_ptr<MockProgram> program;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SPatchThreadPayload threadPayload;
|
||||
KernelInfo kernelInfo;
|
||||
|
||||
uint32_t kernelIsa[32];
|
||||
|
||||
@@ -422,23 +422,6 @@ TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreated
|
||||
EXPECT_TRUE(pKernelInfo->kernelArgInfo[0].isReadOnly);
|
||||
}
|
||||
|
||||
TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) {
|
||||
PatchInfo patchInfo;
|
||||
EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorDataLoad);
|
||||
EXPECT_EQ(nullptr, patchInfo.localsurface);
|
||||
EXPECT_EQ(nullptr, patchInfo.mediavfestate);
|
||||
EXPECT_EQ(nullptr, patchInfo.mediaVfeStateSlot1);
|
||||
EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorData);
|
||||
EXPECT_EQ(nullptr, patchInfo.samplerStateArray);
|
||||
EXPECT_EQ(nullptr, patchInfo.bindingTableState);
|
||||
EXPECT_EQ(nullptr, patchInfo.dataParameterStream);
|
||||
EXPECT_EQ(nullptr, patchInfo.threadPayload);
|
||||
EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo);
|
||||
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface);
|
||||
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
}
|
||||
|
||||
typedef Test<ClDeviceFixture> KernelPrivateSurfaceTest;
|
||||
typedef Test<ClDeviceFixture> KernelGlobalSurfaceTest;
|
||||
typedef Test<ClDeviceFixture> KernelConstantSurfaceTest;
|
||||
@@ -546,11 +529,11 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda
|
||||
tokenSPS.DataParamOffset = 40;
|
||||
tokenSPS.DataParamSize = 8;
|
||||
tokenSPS.PerThreadPrivateMemorySize = 112;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS);
|
||||
|
||||
SPatchDataParameterStream tokenDPS;
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -583,11 +566,11 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh
|
||||
tokenSPS.DataParamOffset = 40;
|
||||
tokenSPS.DataParamSize = 8;
|
||||
tokenSPS.PerThreadPrivateMemorySize = 112;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS);
|
||||
|
||||
SPatchDataParameterStream tokenDPS;
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -622,11 +605,11 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou
|
||||
tokenSPS.DataParamOffset = 40;
|
||||
tokenSPS.DataParamSize = 8;
|
||||
tokenSPS.PerThreadPrivateMemorySize = 112;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS);
|
||||
|
||||
SPatchDataParameterStream tokenDPS;
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -663,11 +646,11 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS
|
||||
tokenSPS.DataParamOffset = 40;
|
||||
tokenSPS.DataParamSize = 4;
|
||||
tokenSPS.PerThreadPrivateMemorySize = 112;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS);
|
||||
|
||||
SPatchDataParameterStream tokenDPS;
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -692,13 +675,12 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface;
|
||||
AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0;
|
||||
AllocateStatelessPrivateMemorySurface.DataParamOffset = 0;
|
||||
AllocateStatelessPrivateMemorySurface.DataParamSize = 8;
|
||||
AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface;
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface;
|
||||
allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0;
|
||||
allocateStatelessPrivateMemorySurface.DataParamOffset = 0;
|
||||
allocateStatelessPrivateMemorySurface.DataParamSize = 8;
|
||||
allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateMemorySurface);
|
||||
|
||||
MockContext context;
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
@@ -724,7 +706,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset));
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
@@ -774,17 +756,18 @@ TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamWhenGettingConst
|
||||
|
||||
SPatchDataParameterStream tokenDPS;
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenDPS);
|
||||
|
||||
EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {};
|
||||
allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
@@ -798,11 +781,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
|
||||
}
|
||||
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {};
|
||||
allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
@@ -816,11 +800,12 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
|
||||
}
|
||||
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateSurface = {};
|
||||
allocateStatelessPrivateSurface.PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessPrivateSurface);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
@@ -839,15 +824,14 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
// setup global memory
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchDataParameterStream tempSPatchDataParameterStream;
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -878,15 +862,14 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
// setup global memory
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchDataParameterStream tempSPatchDataParameterStream;
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -917,12 +900,11 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup global memory
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessGlobalMemorySurfaceWithInitialization);
|
||||
|
||||
char buffer[16];
|
||||
MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer));
|
||||
@@ -951,7 +933,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset));
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
@@ -995,15 +977,14 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchDataParameterStream tempSPatchDataParameterStream;
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -1033,15 +1014,14 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
|
||||
SPatchDataParameterStream tempSPatchDataParameterStream;
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tempSPatchDataParameterStream);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
@@ -1071,12 +1051,11 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
|
||||
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0;
|
||||
allocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
|
||||
char buffer[16];
|
||||
MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer));
|
||||
@@ -1105,7 +1084,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
|
||||
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset));
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful));
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
@@ -2461,7 +2440,7 @@ struct KernelCrossThreadTests : Test<ClDeviceFixture> {
|
||||
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
ASSERT_NE(nullptr, pKernelInfo);
|
||||
pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, patchDataParameterStream);
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
}
|
||||
|
||||
@@ -2628,12 +2607,11 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS
|
||||
delete kernel;
|
||||
}
|
||||
TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) {
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface allocatePrivate;
|
||||
allocatePrivate.DataParamSize = 8;
|
||||
allocatePrivate.DataParamOffset = 0;
|
||||
allocatePrivate.PerThreadPrivateMemorySize = 1;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, allocatePrivate);
|
||||
|
||||
MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
|
||||
@@ -2694,15 +2672,12 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo
|
||||
|
||||
TEST(KernelInfoTest, WhenPatchingBorderColorOffsetThenPatchIsAppliedCorrectly) {
|
||||
KernelInfo info;
|
||||
SPatchSamplerStateArray samplerState;
|
||||
samplerState.BorderColorOffset = 3;
|
||||
|
||||
info.patchInfo.samplerStateArray = nullptr;
|
||||
|
||||
EXPECT_EQ(0u, info.getBorderColorOffset());
|
||||
|
||||
info.patchInfo.samplerStateArray = &samplerState;
|
||||
|
||||
SPatchSamplerStateArray samplerState = {};
|
||||
samplerState.BorderColorOffset = 3;
|
||||
samplerState.Count = 1;
|
||||
populateKernelDescriptor(info.kernelDescriptor, samplerState);
|
||||
EXPECT_EQ(3u, info.getBorderColorOffset());
|
||||
}
|
||||
|
||||
@@ -3141,12 +3116,14 @@ TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivat
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = 512u;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstateSlot1;
|
||||
mediaVFEstateSlot1.PerThreadScratchSpace = 1024u;
|
||||
mediaVFEstate.PerThreadScratchSpace = 512u;
|
||||
mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate;
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstateSlot1, 1);
|
||||
|
||||
EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
@@ -3155,7 +3132,6 @@ TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateSc
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
||||
MockKernelWithInternals mockKernel(*device);
|
||||
mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr;
|
||||
|
||||
EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize(device->getRootDeviceIndex()));
|
||||
}
|
||||
@@ -3174,6 +3150,31 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitS
|
||||
EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithPatchInfoCollecitonEnabledAndArgumentWithInvalidCrossThreadDataOffsetWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
MockGraphicsAllocation mockAllocation;
|
||||
ArgDescPointer arg;
|
||||
uint64_t ptr = 0;
|
||||
kernel.mockKernel->patchWithImplicitSurface(&ptr, mockAllocation, device->getDevice(), arg);
|
||||
EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
MockGraphicsAllocation mockAllocation;
|
||||
ArgDescPointer arg;
|
||||
arg.stateless = 0;
|
||||
uint64_t crossThreadData = 0;
|
||||
EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size());
|
||||
kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, device->getDevice(), arg);
|
||||
EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
@@ -3234,7 +3235,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
@@ -3253,7 +3254,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
@@ -3272,7 +3273,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = 128u;
|
||||
mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
@@ -3395,7 +3396,7 @@ TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThe
|
||||
tokenSPS.DataParamOffset = 40;
|
||||
tokenSPS.DataParamSize = 8;
|
||||
tokenSPS.PerThreadPrivateMemorySize = 112;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS;
|
||||
populateKernelDescriptor(pKernelInfo->kernelDescriptor, tokenSPS);
|
||||
|
||||
KernelInfoContainer kernelInfos;
|
||||
kernelInfos.resize(deviceFactory->rootDevices.size());
|
||||
|
||||
@@ -132,50 +132,46 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA
|
||||
|
||||
crossThreadOffsetBlock += 8;
|
||||
|
||||
auto privateSurfaceBlock = std::make_unique<SPatchAllocateStatelessPrivateSurface>();
|
||||
privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 1000;
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get();
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 1000;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
crossThreadOffsetBlock += 8;
|
||||
|
||||
SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload;
|
||||
threadPayloadBlock->LocalIDXPresent = 0;
|
||||
threadPayloadBlock->LocalIDYPresent = 0;
|
||||
threadPayloadBlock->LocalIDZPresent = 0;
|
||||
threadPayloadBlock->HeaderPresent = 0;
|
||||
threadPayloadBlock->Size = 128;
|
||||
|
||||
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
|
||||
SPatchThreadPayload threadPayloadBlock = {};
|
||||
threadPayloadBlock.LocalIDXPresent = 0;
|
||||
threadPayloadBlock.LocalIDYPresent = 0;
|
||||
threadPayloadBlock.LocalIDZPresent = 0;
|
||||
threadPayloadBlock.HeaderPresent = 0;
|
||||
threadPayloadBlock.Size = 128;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, threadPayloadBlock);
|
||||
|
||||
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
|
||||
streamBlock->DataParameterStreamSize = 0;
|
||||
streamBlock->Size = 0;
|
||||
infoBlock->patchInfo.dataParameterStream = streamBlock;
|
||||
SPatchDataParameterStream streamBlock = {};
|
||||
streamBlock.DataParameterStreamSize = 0;
|
||||
streamBlock.Size = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, streamBlock);
|
||||
|
||||
SPatchBindingTableState *bindingTable = new SPatchBindingTableState;
|
||||
bindingTable->Count = 0;
|
||||
bindingTable->Offset = 0;
|
||||
bindingTable->Size = 0;
|
||||
bindingTable->SurfaceStateOffset = 0;
|
||||
infoBlock->patchInfo.bindingTableState = bindingTable;
|
||||
SPatchBindingTableState bindingTable = {};
|
||||
bindingTable.Count = 0;
|
||||
bindingTable.Offset = 0;
|
||||
bindingTable.Size = 0;
|
||||
bindingTable.SurfaceStateOffset = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, bindingTable);
|
||||
|
||||
SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData;
|
||||
idData->BindingTableOffset = 0;
|
||||
idData->KernelOffset = 0;
|
||||
idData->Offset = 0;
|
||||
idData->SamplerStateOffset = 0;
|
||||
idData->Size = 0;
|
||||
infoBlock->patchInfo.interfaceDescriptorData = idData;
|
||||
|
||||
infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
|
||||
infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
|
||||
SPatchInterfaceDescriptorData idData = {};
|
||||
idData.BindingTableOffset = 0;
|
||||
idData.KernelOffset = 0;
|
||||
idData.Offset = 0;
|
||||
idData.SamplerStateOffset = 0;
|
||||
idData.Size = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, idData);
|
||||
|
||||
infoBlock->heapInfo.pDsh = (void *)new uint64_t[64];
|
||||
infoBlock->crossThreadData = new char[crossThreadOffsetBlock];
|
||||
|
||||
@@ -25,11 +25,24 @@
|
||||
#include <cassert>
|
||||
|
||||
namespace NEO {
|
||||
void populateKernelArgDescriptor(KernelDescriptor &dst, size_t argNum, const SPatchDataParameterBuffer &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrintfSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessEventPoolSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessDefaultDeviceQueueSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchString &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSystemThreadSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateLocalSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchInterfaceDescriptorData &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchMediaVFEState &token, uint32_t slot);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchBindingTableState &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterStream &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateStatelessPrivateSurface &token);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateSyncBuffer &token);
|
||||
|
||||
struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation {
|
||||
MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) {
|
||||
@@ -147,7 +160,6 @@ class MockKernel : public Kernel {
|
||||
}
|
||||
|
||||
if (kernelInfoAllocated) {
|
||||
delete kernelInfoAllocated->patchInfo.threadPayload;
|
||||
delete kernelInfoAllocated;
|
||||
}
|
||||
}
|
||||
@@ -162,14 +174,13 @@ class MockKernel : public Kernel {
|
||||
auto info = new KernelInfo();
|
||||
const size_t crossThreadSize = 160;
|
||||
|
||||
SPatchThreadPayload *threadPayload = new SPatchThreadPayload;
|
||||
threadPayload->LocalIDXPresent = 0;
|
||||
threadPayload->LocalIDYPresent = 0;
|
||||
threadPayload->LocalIDZPresent = 0;
|
||||
threadPayload->HeaderPresent = 0;
|
||||
threadPayload->Size = 128;
|
||||
|
||||
info->patchInfo.threadPayload = threadPayload;
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = 0;
|
||||
threadPayload.LocalIDYPresent = 0;
|
||||
threadPayload.LocalIDZPresent = 0;
|
||||
threadPayload.HeaderPresent = 0;
|
||||
threadPayload.Size = 128;
|
||||
populateKernelDescriptor(info->kernelDescriptor, threadPayload);
|
||||
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
|
||||
info->kernelDescriptor.kernelAttributes.numGrfRequired = grfNumber;
|
||||
@@ -214,6 +225,8 @@ class MockKernel : public Kernel {
|
||||
}
|
||||
|
||||
if (newCrossThreadDataSize == 0) {
|
||||
kernelDeviceInfos[rootDeviceIndex].crossThreadData = nullptr;
|
||||
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = 0;
|
||||
return;
|
||||
}
|
||||
kernelDeviceInfos[rootDeviceIndex].crossThreadData = mockCrossThreadDatas[rootDeviceIndex].data();
|
||||
@@ -300,26 +313,27 @@ class MockKernelWithInternals {
|
||||
public:
|
||||
MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {
|
||||
memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon));
|
||||
memset(&threadPayload, 0, sizeof(SPatchThreadPayload));
|
||||
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
|
||||
memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState));
|
||||
memset(&mediaVfeStateSlot1, 0, sizeof(SPatchMediaVFEState));
|
||||
memset(&threadPayload, 0, sizeof(SPatchThreadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.pSsh = sshLocal;
|
||||
kernelInfo.heapInfo.pDsh = dshLocal;
|
||||
kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(sshLocal);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream);
|
||||
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, execEnv);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
kernelInfo.patchInfo.mediavfestate = &mediaVfeState;
|
||||
kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVfeStateSlot1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, mediaVfeState, 0);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, mediaVfeStateSlot1, 1);
|
||||
|
||||
if (context == nullptr) {
|
||||
mockContext = new MockContext;
|
||||
@@ -432,14 +446,13 @@ class MockParentKernel : public Kernel {
|
||||
uint32_t crossThreadOffset = 0;
|
||||
uint32_t crossThreadOffsetBlock = 0;
|
||||
|
||||
SPatchThreadPayload *threadPayload = new SPatchThreadPayload;
|
||||
threadPayload->LocalIDXPresent = 0;
|
||||
threadPayload->LocalIDYPresent = 0;
|
||||
threadPayload->LocalIDZPresent = 0;
|
||||
threadPayload->HeaderPresent = 0;
|
||||
threadPayload->Size = 128;
|
||||
|
||||
info->patchInfo.threadPayload = threadPayload;
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = 0;
|
||||
threadPayload.LocalIDYPresent = 0;
|
||||
threadPayload.LocalIDZPresent = 0;
|
||||
threadPayload.HeaderPresent = 0;
|
||||
threadPayload.Size = 128;
|
||||
populateKernelDescriptor(info->kernelDescriptor, threadPayload);
|
||||
|
||||
info->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
@@ -523,63 +536,57 @@ class MockParentKernel : public Kernel {
|
||||
crossThreadOffsetBlock += 8;
|
||||
}
|
||||
|
||||
infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr;
|
||||
infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr;
|
||||
|
||||
if (addChildGlobalMemory) {
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *globalMemoryBlock = new SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
globalMemoryBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
globalMemoryBlock->DataParamSize = 8;
|
||||
globalMemoryBlock->Size = 8;
|
||||
globalMemoryBlock->SurfaceStateHeapOffset = 0;
|
||||
globalMemoryBlock->Token = 0;
|
||||
infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = globalMemoryBlock;
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization globalMemoryBlock = {};
|
||||
globalMemoryBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
globalMemoryBlock.DataParamSize = 8;
|
||||
globalMemoryBlock.Size = 8;
|
||||
globalMemoryBlock.SurfaceStateHeapOffset = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, globalMemoryBlock);
|
||||
crossThreadOffsetBlock += 8;
|
||||
}
|
||||
|
||||
if (addChildConstantMemory) {
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *constantMemoryBlock = new SPatchAllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
constantMemoryBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
constantMemoryBlock->DataParamSize = 8;
|
||||
constantMemoryBlock->Size = 8;
|
||||
constantMemoryBlock->SurfaceStateHeapOffset = 0;
|
||||
constantMemoryBlock->Token = 0;
|
||||
infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = constantMemoryBlock;
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization constantMemoryBlock = {};
|
||||
constantMemoryBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
constantMemoryBlock.DataParamSize = 8;
|
||||
constantMemoryBlock.Size = 8;
|
||||
constantMemoryBlock.SurfaceStateHeapOffset = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, constantMemoryBlock);
|
||||
crossThreadOffsetBlock += 8;
|
||||
}
|
||||
|
||||
SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload;
|
||||
threadPayloadBlock->LocalIDXPresent = 0;
|
||||
threadPayloadBlock->LocalIDYPresent = 0;
|
||||
threadPayloadBlock->LocalIDZPresent = 0;
|
||||
threadPayloadBlock->HeaderPresent = 0;
|
||||
threadPayloadBlock->Size = 128;
|
||||
|
||||
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
|
||||
SPatchThreadPayload threadPayloadBlock = {};
|
||||
threadPayloadBlock.LocalIDXPresent = 0;
|
||||
threadPayloadBlock.LocalIDYPresent = 0;
|
||||
threadPayloadBlock.LocalIDZPresent = 0;
|
||||
threadPayloadBlock.HeaderPresent = 0;
|
||||
threadPayloadBlock.Size = 128;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, threadPayloadBlock);
|
||||
|
||||
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
|
||||
streamBlock->DataParameterStreamSize = 0;
|
||||
streamBlock->Size = 0;
|
||||
infoBlock->patchInfo.dataParameterStream = streamBlock;
|
||||
SPatchDataParameterStream streamBlock = {};
|
||||
streamBlock.DataParameterStreamSize = 0;
|
||||
streamBlock.Size = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, streamBlock);
|
||||
|
||||
SPatchBindingTableState *bindingTable = new SPatchBindingTableState;
|
||||
bindingTable->Count = 0;
|
||||
bindingTable->Offset = 0;
|
||||
bindingTable->Size = 0;
|
||||
bindingTable->SurfaceStateOffset = 0;
|
||||
infoBlock->patchInfo.bindingTableState = bindingTable;
|
||||
SPatchBindingTableState bindingTable = {};
|
||||
bindingTable.Count = 0;
|
||||
bindingTable.Offset = 0;
|
||||
bindingTable.Size = 0;
|
||||
bindingTable.SurfaceStateOffset = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, bindingTable);
|
||||
|
||||
SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData;
|
||||
idData->BindingTableOffset = 0;
|
||||
idData->KernelOffset = 0;
|
||||
idData->Offset = 0;
|
||||
idData->SamplerStateOffset = 0;
|
||||
idData->Size = 0;
|
||||
infoBlock->patchInfo.interfaceDescriptorData = idData;
|
||||
SPatchInterfaceDescriptorData idData = {};
|
||||
idData.BindingTableOffset = 0;
|
||||
idData.KernelOffset = 0;
|
||||
idData.Offset = 0;
|
||||
idData.SamplerStateOffset = 0;
|
||||
idData.Size = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, idData);
|
||||
|
||||
infoBlock->heapInfo.pDsh = (void *)new uint64_t[64];
|
||||
infoBlock->crossThreadData = new char[crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize];
|
||||
@@ -599,18 +606,11 @@ class MockParentKernel : public Kernel {
|
||||
continue;
|
||||
}
|
||||
auto &kernelInfo = *pKernelInfo;
|
||||
delete kernelInfo.patchInfo.threadPayload;
|
||||
delete &kernelInfo;
|
||||
BlockKernelManager *blockManager = program->getBlockKernelManager();
|
||||
|
||||
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
|
||||
const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i);
|
||||
delete blockInfo->patchInfo.threadPayload;
|
||||
delete blockInfo->patchInfo.dataParameterStream;
|
||||
delete blockInfo->patchInfo.bindingTableState;
|
||||
delete blockInfo->patchInfo.interfaceDescriptorData;
|
||||
delete blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
delete blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
delete[](uint64_t *) blockInfo->heapInfo.pDsh;
|
||||
}
|
||||
}
|
||||
@@ -640,28 +640,16 @@ class MockSchedulerKernel : public SchedulerKernel {
|
||||
class MockDebugKernel : public MockKernel {
|
||||
public:
|
||||
MockDebugKernel(Program *program, KernelInfoContainer &kernelInfos) : MockKernel(program, kernelInfos) {
|
||||
if (!kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface) {
|
||||
SPatchAllocateSystemThreadSurface *patchToken = new SPatchAllocateSystemThreadSurface;
|
||||
|
||||
patchToken->BTI = 0;
|
||||
patchToken->Offset = 0;
|
||||
patchToken->PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize;
|
||||
patchToken->Size = sizeof(SPatchAllocateSystemThreadSurface);
|
||||
patchToken->Token = iOpenCL::PATCH_TOKEN_ALLOCATE_SIP_SURFACE;
|
||||
|
||||
const_cast<KernelInfo *>(kernelInfos[0])->patchInfo.pAllocateSystemThreadSurface = patchToken;
|
||||
|
||||
systemThreadSurfaceAllocated = true;
|
||||
if (!isValidOffset(kernelInfos[0]->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) {
|
||||
SPatchAllocateSystemThreadSurface allocateSystemThreadSurface = {};
|
||||
allocateSystemThreadSurface.Offset = 0;
|
||||
allocateSystemThreadSurface.PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize;
|
||||
populateKernelDescriptor(const_cast<KernelDescriptor &>(kernelInfos[0]->kernelDescriptor), allocateSystemThreadSurface);
|
||||
}
|
||||
}
|
||||
|
||||
~MockDebugKernel() override {
|
||||
if (systemThreadSurfaceAllocated) {
|
||||
delete kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface;
|
||||
}
|
||||
}
|
||||
~MockDebugKernel() override {}
|
||||
static const uint32_t perThreadSystemThreadSurfaceSize;
|
||||
bool systemThreadSurfaceAllocated = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -41,15 +41,16 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@@ -60,7 +61,6 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
KernelInfo kernelInfo;
|
||||
MockContext ctx;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
#include "opencl/source/platform/platform.h"
|
||||
@@ -38,20 +39,6 @@ TEST_F(KernelDataTest, GivenHeapsWhenBuildingThenProgramIsCorrect) {
|
||||
buildAndDecode();
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenMediaInterfaceDescriptorLoadWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchMediaInterfaceDescriptorLoad mediaIdLoad;
|
||||
mediaIdLoad.Token = PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
mediaIdLoad.Size = sizeof(SPatchMediaInterfaceDescriptorLoad);
|
||||
mediaIdLoad.InterfaceDescriptorDataOffset = 0xabcd;
|
||||
|
||||
pPatchList = &mediaIdLoad;
|
||||
patchListSize = mediaIdLoad.Size;
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, pKernelInfo->patchInfo.interfaceDescriptorDataLoad->Token);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchAllocateLocalSurface allocateLocalSurface;
|
||||
allocateLocalSurface.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE;
|
||||
@@ -64,8 +51,7 @@ TEST_F(KernelDataTest, GivenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, pKernelInfo->patchInfo.localsurface->Token);
|
||||
EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->patchInfo.localsurface->TotalInlineLocalMemorySize);
|
||||
EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenAllocateStatelessConstantMemoryWithInitWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -81,8 +67,7 @@ TEST_F(KernelDataTest, GivenAllocateStatelessConstantMemoryWithInitWhenBuildingT
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->Token);
|
||||
EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset);
|
||||
EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenAllocateStatelessGlobalMemoryWithInitWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -98,8 +83,7 @@ TEST_F(KernelDataTest, GivenAllocateStatelessGlobalMemoryWithInitWhenBuildingThe
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->Token);
|
||||
EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset);
|
||||
EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenPrintfStringWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -146,9 +130,7 @@ TEST_F(KernelDataTest, GivenMediaVfeStateWhenBuildingThenProgramIsCorrect) {
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE, pKernelInfo->patchInfo.mediavfestate->Token);
|
||||
EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediavfestate->PerThreadScratchSpace);
|
||||
EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediavfestate->ScratchSpaceOffset);
|
||||
EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, WhenMediaVfeStateSlot1TokenIsParsedThenCorrectValuesAreSet) {
|
||||
@@ -163,9 +145,7 @@ TEST_F(KernelDataTest, WhenMediaVfeStateSlot1TokenIsParsedThenCorrectValuesAreSe
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1, pKernelInfo->patchInfo.mediaVfeStateSlot1->Token);
|
||||
EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace);
|
||||
EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediaVfeStateSlot1->ScratchSpaceOffset);
|
||||
EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) {
|
||||
@@ -181,27 +161,10 @@ TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) {
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSyncBuffer->Token);
|
||||
EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
|
||||
EXPECT_EQ(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamOffset);
|
||||
EXPECT_EQ(token.DataParamSize, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenMediaInterfaceDescriptorDataWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchInterfaceDescriptorData idData;
|
||||
idData.Token = PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA;
|
||||
idData.Size = sizeof(SPatchInterfaceDescriptorData);
|
||||
idData.BindingTableOffset = 0xaa;
|
||||
idData.KernelOffset = 0xbb;
|
||||
idData.Offset = 0xcc;
|
||||
idData.SamplerStateOffset = 0xdd;
|
||||
|
||||
pPatchList = &idData;
|
||||
patchListSize = idData.Size;
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, pKernelInfo->patchInfo.interfaceDescriptorData->Token);
|
||||
EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer);
|
||||
EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful);
|
||||
EXPECT_EQ(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless);
|
||||
EXPECT_EQ(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenSamplerArgumentWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -251,45 +214,22 @@ TEST_F(KernelDataTest, GivenBindingTableStateWhenBuildingThenProgramIsCorrect) {
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_BINDING_TABLE_STATE, pKernelInfo->patchInfo.bindingTableState->Token);
|
||||
EXPECT_EQ_CONST(bindingTableState.Count, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries);
|
||||
EXPECT_EQ_CONST(bindingTableState.Offset, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenDataParameterStreamWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM;
|
||||
dataParameterStream.Size = sizeof(SPatchDataParameterStream);
|
||||
dataParameterStream.DataParameterStreamSize = 0x10;
|
||||
dataParameterStream.DataParameterStreamSize = 64;
|
||||
|
||||
pPatchList = &dataParameterStream;
|
||||
patchListSize = dataParameterStream.Size;
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_STREAM, pKernelInfo->patchInfo.dataParameterStream->Token);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenThreadPayloadWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchThreadPayload threadPayload;
|
||||
threadPayload.Token = PATCH_TOKEN_THREAD_PAYLOAD;
|
||||
threadPayload.Size = sizeof(SPatchThreadPayload);
|
||||
threadPayload.GetGlobalOffsetPresent = true;
|
||||
threadPayload.GetGroupIDPresent = true;
|
||||
threadPayload.GetLocalIDPresent = true;
|
||||
threadPayload.HeaderPresent = true;
|
||||
threadPayload.IndirectPayloadStorage = true;
|
||||
threadPayload.LocalIDFlattenedPresent = true;
|
||||
threadPayload.LocalIDXPresent = true;
|
||||
threadPayload.LocalIDYPresent = true;
|
||||
threadPayload.LocalIDZPresent = true;
|
||||
threadPayload.OffsetToSkipPerThreadDataLoad = true;
|
||||
threadPayload.PassInlineData = true;
|
||||
|
||||
pPatchList = &threadPayload;
|
||||
patchListSize = threadPayload.Size;
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_THREAD_PAYLOAD, pKernelInfo->patchInfo.threadPayload->Token);
|
||||
EXPECT_EQ_CONST(dataParameterStream.DataParameterStreamSize, pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -444,27 +384,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa
|
||||
EXPECT_EQ_VAL(1u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size());
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenKernelAttributesInfoWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchKernelAttributesInfo kernelAttributesInfo;
|
||||
kernelAttributesInfo.Token = PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO;
|
||||
kernelAttributesInfo.AttributesSize = 0x10;
|
||||
kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize;
|
||||
const std::string attributesValue = "dummy_attribute";
|
||||
|
||||
std::vector<char> patchToken(sizeof(iOpenCL::SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize);
|
||||
memcpy_s(patchToken.data(), patchToken.size(), &kernelAttributesInfo, sizeof(iOpenCL::SPatchKernelAttributesInfo));
|
||||
memcpy_s(patchToken.data() + sizeof(iOpenCL::SPatchKernelAttributesInfo), kernelAttributesInfo.AttributesSize,
|
||||
attributesValue.data(), attributesValue.size());
|
||||
|
||||
pPatchList = patchToken.data();
|
||||
patchListSize = static_cast<uint32_t>(patchToken.size());
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(attributesValue, pKernelInfo->kernelDescriptor.kernelMetadata.kernelLanguageAttributes);
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, pKernelInfo->patchInfo.pKernelAttributesInfo->Token);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, WhenDecodingExecutionEnvironmentTokenThenWalkOrderIsForcedToXMajor) {
|
||||
iOpenCL::SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT;
|
||||
@@ -541,17 +460,23 @@ TEST_P(DataParameterTest, GivenTokenTypeWhenBuildingThenProgramIsCorrect) {
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
if (pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size() > 0) {
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token);
|
||||
EXPECT_EQ_VAL(GetParam(), pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type);
|
||||
if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) {
|
||||
if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) {
|
||||
EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess);
|
||||
} else {
|
||||
EXPECT_FALSE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess);
|
||||
}
|
||||
} // no else - some params are skipped
|
||||
if (DATA_PARAMETER_KERNEL_ARGUMENT == GetParam()) {
|
||||
ASSERT_NE(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(dataParameterToken.ArgumentNumber, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].argNum);
|
||||
EXPECT_EQ(dataParameterToken.Offset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.offset);
|
||||
EXPECT_EQ(dataParameterToken.SourceOffset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.sourceOffset);
|
||||
EXPECT_EQ(dataParameterToken.DataSize, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.size);
|
||||
} else {
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
}
|
||||
|
||||
if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) {
|
||||
if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) {
|
||||
EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess);
|
||||
} else {
|
||||
EXPECT_FALSE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess);
|
||||
}
|
||||
} // no else - some params are skipped
|
||||
}
|
||||
|
||||
// note that we start at '2' because we test kernel arg tokens elsewhere
|
||||
@@ -577,7 +502,7 @@ TEST_F(KernelDataParameterTest, GivenDataParameterBufferOffsetWhenBuildingThenPr
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
EXPECT_EQ_VAL(pKernelInfo->kernelArgInfo[1].offsetBufferOffset, dataParameterToken.Offset);
|
||||
}
|
||||
@@ -594,7 +519,7 @@ TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThen
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].pureStatefulBufferAccess);
|
||||
}
|
||||
@@ -616,7 +541,7 @@ TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParamete
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_VAL(0u, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ_VAL(0u, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenDataParameterSumOfLocalMemoryObjectArgumentSizesWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -640,7 +565,7 @@ TEST_F(KernelDataTest, GivenDataParameterSumOfLocalMemoryObjectArgumentSizesWhen
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
EXPECT_EQ(alignment, pKernelInfo->kernelArgInfo[argumentNumber].slmAlignment);
|
||||
ASSERT_EQ(1U, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector.size());
|
||||
@@ -668,7 +593,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageWidthWhenBuildingThenProgramIsCorr
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
EXPECT_EQ(offsetImgWidth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgWidth);
|
||||
}
|
||||
@@ -694,7 +619,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageHeightWhenBuildingThenProgramIsCor
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetImgHeight, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgHeight);
|
||||
@@ -721,7 +646,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageDepthWhenBuildingThenProgramIsCorr
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetImgDepth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgDepth);
|
||||
@@ -748,7 +673,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageNumSamplersWhenBuildingThenProgram
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetNumSamples, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumSamples);
|
||||
@@ -775,7 +700,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageNumMipLevelsWhenBuildingThenProgra
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetNumMipLevels, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumMipLevels);
|
||||
@@ -810,7 +735,7 @@ TEST_F(KernelDataTest, givenFlatImageDataParamTokenWhenDecodingThenSetAllOffsets
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
};
|
||||
|
||||
@@ -848,7 +773,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageDataTypeWhenBuildingThenProgramIsC
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetChannelDataType, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelDataType);
|
||||
@@ -875,7 +800,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageChannelOrderWhenBuildingThenProgra
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetChannelOrder, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelOrder);
|
||||
@@ -902,7 +827,7 @@ TEST_F(KernelDataTest, GivenDataParameterImageArraySizeWhenBuildingThenProgramIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetImageArraySize, pKernelInfo->kernelArgInfo[argumentNumber].offsetArraySize);
|
||||
@@ -929,7 +854,7 @@ TEST_F(KernelDataTest, GivenDataParameterWorkDimensionsWhenBuildingThenProgramIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetWorkDim, pKernelInfo->workloadInfo.workDimOffset);
|
||||
@@ -956,7 +881,7 @@ TEST_F(KernelDataTest, GivenDataParameterSimdSizeWhenBuildingThenProgramIsCorrec
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetSimdSize, pKernelInfo->workloadInfo.simdSizeOffset);
|
||||
@@ -983,7 +908,7 @@ TEST_F(KernelDataTest, GivenParameterPrivateMemoryStatelessSizeWhenBuildingThenP
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size());
|
||||
}
|
||||
|
||||
@@ -1008,7 +933,7 @@ TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowSizeWhenBuild
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size());
|
||||
}
|
||||
|
||||
@@ -1033,7 +958,7 @@ TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowStartAddressW
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size());
|
||||
}
|
||||
|
||||
@@ -1058,7 +983,7 @@ TEST_F(KernelDataTest, GivenDataParameterNumWorkGroupsWhenBuildingThenProgramIsC
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->workloadInfo.numWorkGroupsOffset[argumentNumber]);
|
||||
@@ -1085,7 +1010,7 @@ TEST_F(KernelDataTest, GivenDataParameterMaxWorkgroupSizeWhenBuildingThenProgram
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->workloadInfo.maxWorkGroupSizeOffset);
|
||||
@@ -1113,7 +1038,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerAddressModeWhenBuildingThenProgr
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(1U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode);
|
||||
@@ -1141,7 +1066,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerCoordinateSnapWaIsRequiredThenKe
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerSnapWa);
|
||||
@@ -1169,7 +1094,7 @@ TEST_F(KernelDataTest, GivenDataParameterSamplerNormalizedCoordsThenKernelInfoIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerNormalizedCoords);
|
||||
@@ -1207,9 +1132,7 @@ TEST_F(KernelDataTest, GivenDataParameterKernelArgumentWhenBuildingThenProgramIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token);
|
||||
EXPECT_EQ_VAL(DATA_PARAMETER_KERNEL_ARGUMENT, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type);
|
||||
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(1u, pKernelInfo->kernelArgInfo.size());
|
||||
ASSERT_EQ(2u, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.size());
|
||||
|
||||
@@ -1262,7 +1185,7 @@ TEST_F(KernelDataTest, GivenPatchTokenSamplerStateArrayWhenBuildingThenProgramIs
|
||||
token.Size = static_cast<uint32_t>(sizeof(SPatchSamplerStateArray));
|
||||
|
||||
token.Offset = 33;
|
||||
token.Count = 0x1FF0;
|
||||
token.Count = 0xF0;
|
||||
token.BorderColorOffset = 0x3FF0;
|
||||
|
||||
pPatchList = &token;
|
||||
@@ -1270,11 +1193,9 @@ TEST_F(KernelDataTest, GivenPatchTokenSamplerStateArrayWhenBuildingThenProgramIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
ASSERT_NE(nullptr, pKernelInfo->patchInfo.samplerStateArray);
|
||||
|
||||
EXPECT_EQ_VAL(token.Offset, pKernelInfo->patchInfo.samplerStateArray->Offset);
|
||||
EXPECT_EQ_VAL(token.Count, pKernelInfo->patchInfo.samplerStateArray->Count);
|
||||
EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->patchInfo.samplerStateArray->BorderColorOffset);
|
||||
EXPECT_EQ_VAL(token.Offset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset);
|
||||
EXPECT_EQ_VAL(token.Count, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers);
|
||||
EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrivateMemoryWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -1292,12 +1213,10 @@ TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrivateMemoryWhenBuilding
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
ASSERT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface);
|
||||
|
||||
EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset);
|
||||
EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset);
|
||||
EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize);
|
||||
EXPECT_EQ_VAL(token.PerThreadPrivateMemorySize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize);
|
||||
EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful);
|
||||
EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless);
|
||||
EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize);
|
||||
EXPECT_EQ_VAL(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(token, pKernelInfo->kernelDescriptor.kernelAttributes.simdSize), pKernelInfo->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenDataParameterVmeMbBlockTypeWhenBuildingThenProgramIsCorrect) {
|
||||
@@ -1321,7 +1240,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeMbBlockTypeWhenBuildingThenProgramIs
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetVmeMbBlockType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeMbBlockType);
|
||||
@@ -1348,7 +1267,7 @@ TEST_F(KernelDataTest, GivenDataParameterDataVmeSubpixelModeWhenBuildingThenProg
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetVmeSubpixelMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSubpixelMode);
|
||||
@@ -1375,7 +1294,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeSadAdjustModeWhenBuildingThenProgram
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetVmeSadAdjustMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSadAdjustMode);
|
||||
@@ -1402,7 +1321,7 @@ TEST_F(KernelDataTest, GivenDataParameterVmeSearchPathTypeWhenBuildingThenProgra
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size());
|
||||
|
||||
EXPECT_EQ(offsetVmeSearchPathType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSearchPathType);
|
||||
@@ -1420,30 +1339,11 @@ TEST_F(KernelDataTest, GivenPatchTokenStateSipWhenBuildingThenProgramIsCorrect)
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size());
|
||||
EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size());
|
||||
EXPECT_EQ_VAL(token.SystemKernelOffset, pKernelInfo->systemKernelOffset);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenPatchTokenAllocateSipSurfaceWhenBuildingThenProgramIsCorrect) {
|
||||
SPatchAllocateSystemThreadSurface token;
|
||||
token.Token = PATCH_TOKEN_ALLOCATE_SIP_SURFACE;
|
||||
token.Size = static_cast<uint32_t>(sizeof(SPatchAllocateSystemThreadSurface));
|
||||
token.Offset = 32;
|
||||
token.BTI = 0;
|
||||
token.PerThreadSystemThreadSurfaceSize = 0x10000;
|
||||
|
||||
pPatchList = &token;
|
||||
patchListSize = token.Size;
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(0u, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->BTI);
|
||||
EXPECT_EQ(token.Offset, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Offset);
|
||||
EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Token);
|
||||
EXPECT_EQ(token.PerThreadSystemThreadSurfaceSize, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) {
|
||||
SPatchFunctionTableInfo token;
|
||||
token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE;
|
||||
|
||||
@@ -1341,7 +1341,6 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident
|
||||
|
||||
auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex);
|
||||
|
||||
EXPECT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
ASSERT_NE(nullptr, pProgram->getConstantSurface(pClDevice->getRootDeviceIndex()));
|
||||
|
||||
uint32_t expected_values[] = {0xabcd5432u, 0xaabb5533u};
|
||||
@@ -1377,7 +1376,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident
|
||||
|
||||
auto crossThreadData = pKernel->getCrossThreadData(rootDeviceIndex);
|
||||
uint32_t *constBuffGpuAddr = reinterpret_cast<uint32_t *>(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddressToPatch());
|
||||
uintptr_t *pDst = reinterpret_cast<uintptr_t *>(crossThreadData + pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset);
|
||||
uintptr_t *pDst = reinterpret_cast<uintptr_t *>(crossThreadData + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless);
|
||||
|
||||
EXPECT_EQ(*pDst, reinterpret_cast<uintptr_t>(constBuffGpuAddr));
|
||||
|
||||
@@ -1406,7 +1405,6 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenGwsIsSet) {
|
||||
|
||||
auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex);
|
||||
|
||||
ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[0]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[1]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[2]);
|
||||
@@ -1425,14 +1423,12 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) {
|
||||
|
||||
auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex);
|
||||
|
||||
ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]);
|
||||
|
||||
pKernelInfo = pProgram->getKernelInfo("test_get_local_size", rootDeviceIndex);
|
||||
|
||||
ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]);
|
||||
ASSERT_NE(static_cast<uint32_t>(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]);
|
||||
@@ -2072,14 +2068,14 @@ TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfaces
|
||||
|
||||
KernelInfo *infoBlock = new KernelInfo;
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface;
|
||||
privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 0;
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 0;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
program->blockKernelManager->addBlockKernelInfo(infoBlock);
|
||||
|
||||
@@ -2087,7 +2083,6 @@ TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfaces
|
||||
|
||||
EXPECT_EQ(nullptr, program->getBlockKernelManager()->getPrivateSurface(0));
|
||||
|
||||
delete privateSurfaceBlock;
|
||||
delete program;
|
||||
}
|
||||
|
||||
@@ -2098,14 +2093,14 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa
|
||||
|
||||
KernelInfo *infoBlock = new KernelInfo;
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface;
|
||||
privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 1000;
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 1000;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
program->blockKernelManager->addBlockKernelInfo(infoBlock);
|
||||
|
||||
@@ -2113,7 +2108,6 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa
|
||||
|
||||
EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(0));
|
||||
|
||||
delete privateSurfaceBlock;
|
||||
delete program;
|
||||
}
|
||||
|
||||
@@ -2124,14 +2118,14 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa
|
||||
|
||||
KernelInfo *infoBlock = new KernelInfo;
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface;
|
||||
privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 1000;
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 1000;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
program->blockKernelManager->addBlockKernelInfo(infoBlock);
|
||||
|
||||
@@ -2147,7 +2141,6 @@ TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfa
|
||||
|
||||
EXPECT_EQ(privateSurface, privateSurface2);
|
||||
|
||||
delete privateSurfaceBlock;
|
||||
delete program;
|
||||
}
|
||||
|
||||
@@ -2158,14 +2151,14 @@ TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledT
|
||||
|
||||
KernelInfo *infoBlock = new KernelInfo;
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface;
|
||||
privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 1000;
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = crossThreadOffsetBlock;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 1000;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
program->blockKernelManager->addBlockKernelInfo(infoBlock);
|
||||
|
||||
@@ -2176,7 +2169,6 @@ TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledT
|
||||
|
||||
program->freeBlockResources();
|
||||
|
||||
delete privateSurfaceBlock;
|
||||
delete program;
|
||||
}
|
||||
|
||||
@@ -2964,17 +2956,16 @@ using ProgramMultiRootDeviceTests = MultiRootDeviceFixture;
|
||||
|
||||
TEST_F(ProgramMultiRootDeviceTests, WhenPrivateSurfaceIsCreatedThenItHasCorrectRootDeviceIndex) {
|
||||
auto program = std::make_unique<MockProgram>(context.get(), false, toClDeviceVector(*device1));
|
||||
|
||||
auto privateSurfaceBlock = std::make_unique<SPatchAllocateStatelessPrivateSurface>();
|
||||
privateSurfaceBlock->DataParamOffset = 0;
|
||||
privateSurfaceBlock->DataParamSize = 8;
|
||||
privateSurfaceBlock->Size = 8;
|
||||
privateSurfaceBlock->SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock->Token = 0;
|
||||
privateSurfaceBlock->PerThreadPrivateMemorySize = 1000;
|
||||
|
||||
auto infoBlock = std::make_unique<KernelInfo>();
|
||||
infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get();
|
||||
|
||||
SPatchAllocateStatelessPrivateSurface privateSurfaceBlock = {};
|
||||
privateSurfaceBlock.DataParamOffset = 0;
|
||||
privateSurfaceBlock.DataParamSize = 8;
|
||||
privateSurfaceBlock.Size = 8;
|
||||
privateSurfaceBlock.SurfaceStateHeapOffset = 0;
|
||||
privateSurfaceBlock.Token = 0;
|
||||
privateSurfaceBlock.PerThreadPrivateMemorySize = 1000;
|
||||
populateKernelDescriptor(infoBlock->kernelDescriptor, privateSurfaceBlock);
|
||||
|
||||
program->blockKernelManager->addBlockKernelInfo(infoBlock.release());
|
||||
program->allocateBlockPrivateSurfaces(*device1);
|
||||
|
||||
@@ -257,7 +257,7 @@ TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenBui
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex());
|
||||
EXPECT_NE(0u, kernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize);
|
||||
EXPECT_NE(0u, kernelInfo->kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize);
|
||||
}
|
||||
|
||||
TEST_F(ProgramWithKernelDebuggingTest, givenKernelDebugEnabledWhenProgramIsBuiltThenDebugDataIsStored) {
|
||||
|
||||
@@ -23,6 +23,9 @@
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
namespace NEO {
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchDataParameterStream &token);
|
||||
}
|
||||
|
||||
class MockSchedulerKernel : public SchedulerKernel {
|
||||
public:
|
||||
@@ -31,14 +34,14 @@ class MockSchedulerKernel : public SchedulerKernel {
|
||||
|
||||
static MockSchedulerKernel *create(Program &program, KernelInfo *&info) {
|
||||
info = new KernelInfo;
|
||||
SPatchDataParameterStream dataParametrStream;
|
||||
dataParametrStream.DataParameterStreamSize = 8;
|
||||
dataParametrStream.Size = 8;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.DataParameterStreamSize = 8;
|
||||
dataParameterStream.Size = 8;
|
||||
populateKernelDescriptor(info->kernelDescriptor, dataParameterStream);
|
||||
|
||||
info->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
|
||||
|
||||
info->patchInfo.dataParameterStream = &dataParametrStream;
|
||||
KernelArgInfo bufferArg;
|
||||
bufferArg.isBuffer = true;
|
||||
|
||||
@@ -114,8 +117,8 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) {
|
||||
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.DataParameterStreamSize = crossTrheadDataSize;
|
||||
populateKernelDescriptor(info.kernelDescriptor, dataParameterStream);
|
||||
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
info.heapInfo.DynamicStateHeapSize = dshSize;
|
||||
|
||||
KernelInfoContainer kernelInfos;
|
||||
@@ -279,8 +282,6 @@ TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorre
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
KernelInfo info;
|
||||
|
||||
info.patchInfo.dataParameterStream = nullptr;
|
||||
|
||||
KernelInfoContainer kernelInfos;
|
||||
kernelInfos.push_back(&info);
|
||||
MockSchedulerKernel kernel(&program, kernelInfos);
|
||||
|
||||
Reference in New Issue
Block a user