mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Remove executionEnvironment from KernelInfo
Related-To: NEO-3739
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
96bc6b2e01
commit
4948c39d39
@@ -327,7 +327,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU;
|
||||
uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize;
|
||||
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.flags.usesBarriers, simd, this->getSlmTotalSize(),
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
|
||||
coreFamily, numThreadsPerSubSlice, localMemSize,
|
||||
usesImages, false);
|
||||
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
|
||||
@@ -366,7 +366,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
|
||||
descriptor.kernelAttributes.numGrfRequired,
|
||||
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||
|
||||
auto usesBarriers = descriptor.kernelAttributes.flags.usesBarriers;
|
||||
auto barrierCount = descriptor.kernelAttributes.barrierCount;
|
||||
const uint32_t workDim = 3;
|
||||
const size_t localWorkSize[] = {groupSize[0], groupSize[1], groupSize[2]};
|
||||
*totalGroupCount = NEO::KernelHelper::getMaxWorkGroupCount(descriptor.kernelAttributes.simdSize,
|
||||
@@ -375,7 +375,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(usesBarriers),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
workDim,
|
||||
localWorkSize);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -686,7 +686,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
kernel->makeResident(getGpgpuCommandStreamReceiver());
|
||||
requiresCoherency |= kernel->requiresCoherency();
|
||||
mediaSamplerRequired |= kernel->isVmeKernel();
|
||||
auto numGrfRequiredByKernel = kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired;
|
||||
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
|
||||
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
|
||||
if (kernel->hasUncacheableStatelessArgs()) {
|
||||
|
||||
@@ -208,7 +208,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
pIDDestination[blockIndex + i].setKernelStartPointer(static_cast<uint32_t>(blockKernelStartPointer));
|
||||
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
|
||||
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(pIDDestination[blockIndex + i],
|
||||
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,
|
||||
pBlockInfo->kernelDescriptor.kernelAttributes.barrierCount,
|
||||
device->getHardwareInfo());
|
||||
|
||||
// Set offset to sampler states, block's DHSOffset is added by scheduler
|
||||
|
||||
@@ -15,12 +15,12 @@ namespace NEO {
|
||||
template <>
|
||||
void GpgpuWalkerHelper<BDWFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
|
||||
if (disablePerfMode) {
|
||||
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<BDWFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
}
|
||||
} else {
|
||||
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work
|
||||
typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
@@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper<BDWFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
||||
typedef typename BDWFamily::MI_MATH MI_MATH;
|
||||
typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||
size_t n = 0;
|
||||
if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
n += sizeof(PIPE_CONTROL) +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
|
||||
@@ -15,12 +15,12 @@ namespace NEO {
|
||||
template <>
|
||||
void GpgpuWalkerHelper<SKLFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
|
||||
if (disablePerfMode) {
|
||||
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<SKLFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
}
|
||||
} else {
|
||||
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work
|
||||
typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL;
|
||||
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
@@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper<SKLFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
||||
typedef typename SKLFamily::MI_MATH MI_MATH;
|
||||
typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||
size_t n = 0;
|
||||
if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
n += sizeof(PIPE_CONTROL) +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_REG) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
|
||||
@@ -194,7 +194,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
|
||||
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
|
||||
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
|
||||
kernel.getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->HasBarriers,
|
||||
kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.barrierCount,
|
||||
hardwareInfo);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
|
||||
@@ -279,7 +279,6 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
rootDeviceIndex);
|
||||
|
||||
uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA);
|
||||
DEBUG_BREAK_IF(patchInfo.executionEnvironment == nullptr);
|
||||
|
||||
auto bindingTablePrefetchSize = std::min(31u, static_cast<uint32_t>(kernel.getNumberOfBindingTableStates(rootDeviceIndex)));
|
||||
if (resetBindingTablePrefetch(kernel)) {
|
||||
|
||||
@@ -210,7 +210,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
|
||||
const auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex);
|
||||
const auto &kernelDescriptor = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor;
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
@@ -219,7 +219,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
preemptionMode, //preemptionMode
|
||||
kernelInfo.patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired
|
||||
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
|
||||
L3CachingSettings::l3CacheOn, //l3CacheSettings
|
||||
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
|
||||
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
|
||||
|
||||
@@ -66,9 +66,7 @@ uint32_t Kernel::dummyPatchLocation = 0xbaddf00d;
|
||||
|
||||
Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, bool schedulerKernel)
|
||||
: slmTotalSize(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->workloadInfo.slmStaticSize),
|
||||
isParentKernel((kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment != nullptr)
|
||||
? (kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment->HasDeviceEnqueue != 0)
|
||||
: false),
|
||||
isParentKernel(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue),
|
||||
isSchedulerKernel(schedulerKernel),
|
||||
executionEnvironment(programArg->getExecutionEnvironment()),
|
||||
program(programArg),
|
||||
@@ -354,10 +352,8 @@ cl_int Kernel::initialize() {
|
||||
}
|
||||
|
||||
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
|
||||
if (kernelInfo.patchInfo.executionEnvironment) {
|
||||
if (!kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
|
||||
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);
|
||||
}
|
||||
if (false == kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress) {
|
||||
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);
|
||||
}
|
||||
patchBlocksSimdSize(rootDeviceIndex);
|
||||
|
||||
@@ -617,6 +613,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto &kernelInfo = *kernelInfos[rootDeviceIndex];
|
||||
const auto &patchInfo = kernelInfo.patchInfo;
|
||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
size_t preferredWorkGroupSizeMultiple = 0;
|
||||
cl_ulong scratchSize;
|
||||
cl_ulong privateMemSize;
|
||||
@@ -629,7 +626,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
case CL_KERNEL_WORK_GROUP_SIZE:
|
||||
maxWorkgroupSize = kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize;
|
||||
if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) {
|
||||
auto divisionSize = CommonConstants::maximalSimdSize / patchInfo.executionEnvironment->LargestCompiledSIMDSize;
|
||||
auto divisionSize = CommonConstants::maximalSimdSize / kernelInfo.getMaxSimdSize();
|
||||
maxWorkgroupSize /= divisionSize;
|
||||
}
|
||||
srcSize = sizeof(maxWorkgroupSize);
|
||||
@@ -637,10 +634,9 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
break;
|
||||
|
||||
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
|
||||
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
|
||||
requiredWorkGroupSize.val[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
requiredWorkGroupSize.val[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
requiredWorkGroupSize.val[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
requiredWorkGroupSize.val[0] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
requiredWorkGroupSize.val[1] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
requiredWorkGroupSize.val[2] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
srcSize = sizeof(requiredWorkGroupSize);
|
||||
pSrc = &requiredWorkGroupSize;
|
||||
break;
|
||||
@@ -654,8 +650,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
|
||||
break;
|
||||
|
||||
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
||||
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
|
||||
preferredWorkGroupSizeMultiple = patchInfo.executionEnvironment->LargestCompiledSIMDSize;
|
||||
preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize();
|
||||
if (hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
|
||||
preferredWorkGroupSizeMultiple *= 2;
|
||||
}
|
||||
@@ -695,7 +690,7 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
|
||||
const auto &kernelInfo = getKernelInfo(rootDeviceIndex);
|
||||
auto maxSimdSize = static_cast<size_t>(kernelInfo.getMaxSimdSize());
|
||||
auto maxRequiredWorkGroupSize = static_cast<size_t>(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize(rootDeviceIndex)));
|
||||
auto largestCompiledSIMDSize = static_cast<size_t>(kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
||||
auto largestCompiledSIMDSize = static_cast<size_t>(kernelInfo.getMaxSimdSize());
|
||||
|
||||
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
|
||||
|
||||
@@ -781,10 +776,10 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t>(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize)));
|
||||
}
|
||||
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: {
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t>(static_cast<size_t>(kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber)));
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t>(static_cast<size_t>(kernelInfo.kernelDescriptor.kernelMetadata.compiledSubGroupsNumber)));
|
||||
}
|
||||
case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: {
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t>(kernelInfo.requiredSubGroupSize));
|
||||
return changeGetInfoStatusToCLResultType(info.set<size_t>(kernelInfo.kernelDescriptor.kernelMetadata.requiredSubGroupSize));
|
||||
}
|
||||
default:
|
||||
return CL_INVALID_VALUE;
|
||||
@@ -1101,24 +1096,24 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto executionEnvironment = getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
|
||||
const auto &kernelDescriptor = getKernelInfo(rootDeviceIndex).kernelDescriptor;
|
||||
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
auto availableThreadCount = hwHelper.calculateAvailableThreadCount(
|
||||
hardwareInfo.platform.eProductFamily,
|
||||
((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber),
|
||||
kernelDescriptor.kernelAttributes.numGrfRequired,
|
||||
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
|
||||
|
||||
auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u);
|
||||
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
|
||||
return KernelHelper::getMaxWorkGroupCount(kernelInfos[rootDeviceIndex]->getMaxSimdSize(),
|
||||
availableThreadCount,
|
||||
dssCount,
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(hasBarriers),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
workDim,
|
||||
localWorkSize);
|
||||
}
|
||||
@@ -2084,8 +2079,8 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
|
||||
kernelData->m_sizeOfConstantBuffer = kernelInfo.getConstantBufferSize();
|
||||
kernelData->m_PatchTokensMask = tokenMaskIn;
|
||||
kernelData->m_ScratchSpacePatchValue = 0;
|
||||
kernelData->m_SIMDSize = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize : 0;
|
||||
kernelData->m_HasBarriers = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->HasBarriers : 0;
|
||||
kernelData->m_SIMDSize = kernelInfo.getMaxSimdSize();
|
||||
kernelData->m_HasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.barrierCount;
|
||||
kernelData->m_RequiredWkgSizes[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
kernelData->m_RequiredWkgSizes[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
kernelData->m_RequiredWkgSizes[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
|
||||
@@ -136,8 +136,7 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(rootDeviceIndex);
|
||||
this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(rootDeviceIndex);
|
||||
auto pExecutionEnvironment = kernelInfo.patchInfo.executionEnvironment;
|
||||
this->hasBarriers = (pExecutionEnvironment != nullptr) && (pExecutionEnvironment->HasBarriers);
|
||||
this->hasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers();
|
||||
this->simdSize = static_cast<uint32_t>(kernelInfo.getMaxSimdSize());
|
||||
this->slmTotalSize = static_cast<uint32_t>(dispatchInfo.getKernel()->slmTotalSize);
|
||||
this->coreFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
|
||||
@@ -185,7 +184,6 @@ KernelInfo::~KernelInfo() {
|
||||
}
|
||||
|
||||
void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
|
||||
this->patchInfo.executionEnvironment = execEnv;
|
||||
if (execEnv->CompiledForGreaterThan4GBBuffers == false) {
|
||||
this->requiresSshForBuffers = true;
|
||||
}
|
||||
|
||||
@@ -140,26 +140,13 @@ struct KernelInfo {
|
||||
size_t getBorderColorStateSize() const;
|
||||
size_t getBorderColorOffset() const;
|
||||
unsigned int getMaxSimdSize() const {
|
||||
const auto executionEnvironment = patchInfo.executionEnvironment;
|
||||
if (executionEnvironment == nullptr || executionEnvironment->LargestCompiledSIMDSize == 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD32) {
|
||||
return 32;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD16) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
return 8;
|
||||
return kernelDescriptor.kernelAttributes.simdSize;
|
||||
}
|
||||
bool hasDeviceEnqueue() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
|
||||
return kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue;
|
||||
}
|
||||
bool requiresSubgroupIndependentForwardProgress() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
|
||||
return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
|
||||
}
|
||||
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
|
||||
auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
|
||||
@@ -62,7 +62,6 @@ struct PatchInfo {
|
||||
imageMemObjKernelArgs;
|
||||
const SPatchDataParameterStream *dataParameterStream = nullptr;
|
||||
const SPatchThreadPayload *threadPayload = nullptr;
|
||||
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
|
||||
const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr;
|
||||
const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr;
|
||||
const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr;
|
||||
|
||||
@@ -30,6 +30,8 @@ class MediaImageSetArgTest : public ClDeviceFixture,
|
||||
ClDeviceFixture::SetUp();
|
||||
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
|
||||
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
|
||||
@@ -61,6 +63,8 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
|
||||
size_t globalWorkOffset[] = {0, 0, 0};
|
||||
size_t localWorkSize[] = {8, 8, 8};
|
||||
size_t maxConcurrentWorkGroupCount = 0;
|
||||
const_cast<KernelInfo &>(pKernel->getKernelInfo(pDevice->getRootDeviceIndex())).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize,
|
||||
&maxConcurrentWorkGroupCount);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
@@ -22,7 +22,7 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
|
||||
ASSERT_EQ(3u, maxWorkDim);
|
||||
maxWorkGroupSize = static_cast<size_t>(pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize);
|
||||
ASSERT_GE(1024u, maxWorkGroupSize);
|
||||
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
||||
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize());
|
||||
ASSERT_EQ(32u, largestCompiledSIMDSize);
|
||||
|
||||
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
@@ -263,7 +263,7 @@ TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileN
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledSubGroupsNumber));
|
||||
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.compiledSubGroupsNumber));
|
||||
}
|
||||
|
||||
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileSizeTest;
|
||||
|
||||
@@ -24,6 +24,7 @@ class KernelArgSvmFixture : public ApiFixture<> {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ class KernelExecInfoFixture : public ApiFixture<> {
|
||||
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
|
||||
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex));
|
||||
ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize());
|
||||
|
||||
@@ -503,7 +503,7 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_FALSE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
|
||||
this->pCmdQ->flush();
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(pBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()),
|
||||
|
||||
@@ -763,7 +763,7 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStateles
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
@@ -797,7 +797,7 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStateles
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
@@ -1004,8 +1004,7 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderAndInvalidParamsWhenGettingDispatchInfoTh
|
||||
|
||||
TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
|
||||
MockKernelWithInternals mockKernel{*pClDevice};
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
|
||||
@@ -1054,8 +1053,7 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre
|
||||
|
||||
TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
|
||||
MockKernelWithInternals mockKernel{*pClDevice};
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
|
||||
|
||||
@@ -101,7 +101,8 @@ struct CommandQueueStateless : public CommandQueueHw<FamilyType> {
|
||||
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
|
||||
auto kernel = dispatchInfo.begin()->getKernel();
|
||||
auto rootDeviceIndex = this->device->getRootDeviceIndex();
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
};
|
||||
@@ -115,12 +116,12 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
|
||||
auto &device = dispatchInfo.begin()->getClDevice();
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
if (!device.areSharedSystemAllocationsAllowed()) {
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) {
|
||||
EXPECT_TRUE(kernel->allBufferArgsStateful);
|
||||
}
|
||||
} else {
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,11 +49,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
|
||||
executionEnvironment = {};
|
||||
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
executionEnvironment.LargestCompiledSIMDSize = 32;
|
||||
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
@@ -68,13 +63,13 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfoWithSampler.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfoWithSampler.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfoWithSampler.patchInfo.threadPayload = &threadPayload;
|
||||
kernelInfoWithSampler.patchInfo.samplerStateArray = &samplerArray;
|
||||
kernelInfoWithSampler.heapInfo.pDsh = static_cast<const void *>(dsh);
|
||||
@@ -100,7 +95,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
SPatchSamplerStateArray samplerArray = {};
|
||||
|
||||
|
||||
@@ -211,7 +211,7 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -230,7 +230,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
|
||||
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -363,7 +363,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKerne
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
|
||||
@@ -713,12 +713,14 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr
|
||||
|
||||
UltClDeviceFactory clDeviceFactory{1, 0};
|
||||
MockContext context{clDeviceFactory.rootDevices[0]};
|
||||
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
|
||||
|
||||
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
|
||||
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv};
|
||||
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv};
|
||||
|
||||
cl_int retVal;
|
||||
std::unique_ptr<CommandQueue> pCommandQueue{CommandQueue::create(&context, clDeviceFactory.rootDevices[0], nullptr, true, retVal)};
|
||||
|
||||
@@ -125,12 +125,10 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
|
||||
|
||||
EXPECT_NE(nullptr, kernel);
|
||||
// This kernel does not operate on OpenCL 2.0 Read and Write images
|
||||
EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) false);
|
||||
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages);
|
||||
// Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images.
|
||||
// Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer
|
||||
struct SPatchExecutionEnvironment *pExecEnv = (struct SPatchExecutionEnvironment *)kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
|
||||
pExecEnv->UsesFencesForReadWriteImages = (uint32_t) true;
|
||||
EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) true);
|
||||
const_cast<KernelDescriptor &>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = true;
|
||||
|
||||
// Enqueue kernel that may require special WA DisableLSQCROPERFforOCL
|
||||
auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get());
|
||||
@@ -150,7 +148,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
|
||||
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
|
||||
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
|
||||
|
||||
pExecEnv->UsesFencesForReadWriteImages = (uint32_t) false;
|
||||
const_cast<KernelDescriptor &>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = false;
|
||||
|
||||
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);
|
||||
EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH);
|
||||
|
||||
@@ -694,14 +694,10 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarrier
|
||||
dispatchInfo.setClDevice(&device);
|
||||
dispatchInfo.setKernel(kernel.mockKernel);
|
||||
|
||||
kernel.kernelInfo.patchInfo.executionEnvironment = nullptr;
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 0;
|
||||
EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
|
||||
|
||||
kernel.executionEnvironment.HasBarriers = 0;
|
||||
kernel.kernelInfo.patchInfo.executionEnvironment = &kernel.executionEnvironment;
|
||||
EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
|
||||
|
||||
kernel.executionEnvironment.HasBarriers = 1;
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
EXPECT_TRUE(WorkSizeInfo{dispatchInfo}.hasBarriers);
|
||||
}
|
||||
|
||||
|
||||
@@ -326,8 +326,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsC
|
||||
// 4 pages padding expected after cleanup section
|
||||
EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed);
|
||||
|
||||
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
|
||||
|
||||
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
cleanupSectionOffsetToParse += GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2;
|
||||
}
|
||||
|
||||
@@ -402,7 +401,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmC
|
||||
|
||||
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
|
||||
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages && GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) {
|
||||
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) {
|
||||
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor);
|
||||
|
||||
@@ -628,7 +627,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
|
||||
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto iddCount = blockManager->getCount();
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
|
||||
const_cast<KernelDescriptor &>(blockManager->getBlockKernelInfo(i)->kernelDescriptor).kernelAttributes.barrierCount = 1U;
|
||||
}
|
||||
|
||||
auto surfaceStateHeapSize =
|
||||
|
||||
@@ -95,7 +95,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
|
||||
@@ -103,7 +102,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), numChannels);
|
||||
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
|
||||
@@ -323,7 +322,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), nullptr);
|
||||
|
||||
@@ -39,9 +39,6 @@ void DevicePreemptionTests::SetUp() {
|
||||
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr, rootDeviceIndex));
|
||||
context.reset(new MockContext(device.get()));
|
||||
cmdQ.reset(new MockCommandQueue(context.get(), device.get(), properties));
|
||||
executionEnvironment.reset(new SPatchExecutionEnvironment);
|
||||
memset(executionEnvironment.get(), 0, sizeof(SPatchExecutionEnvironment));
|
||||
kernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*device));
|
||||
kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex)));
|
||||
dispatchInfo.reset(new DispatchInfo(device.get(), kernel.get(), 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0)));
|
||||
|
||||
@@ -16,10 +16,6 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace iOpenCL {
|
||||
struct SPatchExecutionEnvironment;
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
class DispatchInfo;
|
||||
class MockCommandQueue;
|
||||
@@ -50,7 +46,6 @@ class DevicePreemptionTests : public ::testing::Test {
|
||||
std::unique_ptr<NEO::MockClDevice> device;
|
||||
std::unique_ptr<NEO::MockContext> context;
|
||||
std::unique_ptr<DebugManagerStateRestore> dbgRestore;
|
||||
std::unique_ptr<iOpenCL::SPatchExecutionEnvironment> executionEnvironment;
|
||||
std::unique_ptr<NEO::MockProgram> program;
|
||||
std::unique_ptr<NEO::KernelInfo> kernelInfo;
|
||||
const uint32_t rootDeviceIndex = 0u;
|
||||
|
||||
@@ -22,6 +22,8 @@ KernelImageArgTest::~KernelImageArgTest() = default;
|
||||
|
||||
void KernelImageArgTest::SetUp() {
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
|
||||
|
||||
@@ -21,9 +21,10 @@ using Gen11EnqueueTest = Test<ClDeviceFixture>;
|
||||
GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenDefaultPolicyIsProgrammed) {
|
||||
MockContext mc;
|
||||
CommandQueueHw<FamilyType> cmdQ{&mc, pClDevice, 0, false};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
|
||||
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
|
||||
|
||||
@@ -39,9 +40,10 @@ GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhen
|
||||
GEN11TEST_F(Gen11EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
|
||||
MockContext mc;
|
||||
CommandQueueHw<FamilyType> cmdQ{&mc, pClDevice, 0, false};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
|
||||
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
|
||||
|
||||
|
||||
@@ -21,9 +21,10 @@ using Gen9EnqueueTest = Test<ClDeviceFixture>;
|
||||
GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) {
|
||||
MockContext mc;
|
||||
CommandQueueHw<SKLFamily> cmdQ{&mc, pClDevice, 0, false};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
|
||||
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
|
||||
|
||||
@@ -39,9 +40,10 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe
|
||||
GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
|
||||
MockContext mc;
|
||||
CommandQueueHw<SKLFamily> cmdQ{&mc, pClDevice, 0, false};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
|
||||
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
|
||||
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
|
||||
|
||||
|
||||
@@ -1134,9 +1134,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GTPinTests, givenInitializedGTPinInterfaceWhenKernel
|
||||
pPatch1->CompiledSubGroupsNumber = 0;
|
||||
pPatch1->HasBarriers = 0;
|
||||
pPatch1->DisableMidThreadPreemption = 0;
|
||||
pPatch1->CompiledSIMD8 = 0;
|
||||
pPatch1->CompiledSIMD16 = 0;
|
||||
pPatch1->CompiledSIMD32 = 1;
|
||||
pPatch1->HasDeviceEnqueue = 1;
|
||||
pPatch1->MayAccessUndeclaredResource = 0;
|
||||
pPatch1->UsesFencesForReadWriteImages = 0;
|
||||
|
||||
@@ -31,20 +31,16 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
cl_device_id device = pClDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
pMediaVFEstate = new SPatchMediaVFEState();
|
||||
pMediaVFEstate->PerThreadScratchSpace = 1024;
|
||||
pMediaVFEstate->ScratchSpaceOffset = 0;
|
||||
|
||||
pExecutionEnvironment = new SPatchExecutionEnvironment();
|
||||
pExecutionEnvironment->CompiledSIMD32 = 1;
|
||||
pExecutionEnvironment->LargestCompiledSIMDSize = 32;
|
||||
pExecutionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
|
||||
|
||||
pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate;
|
||||
pKernelInfo->patchInfo.executionEnvironment = pExecutionEnvironment;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface;
|
||||
|
||||
KernelArgPatchInfo kernelArg1PatchInfo;
|
||||
@@ -79,7 +75,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
void TearDown() override {
|
||||
delete pKernel;
|
||||
delete pPrintfSurface;
|
||||
delete pExecutionEnvironment;
|
||||
delete pMediaVFEstate;
|
||||
delete pProgram;
|
||||
|
||||
@@ -89,7 +84,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
|
||||
|
||||
std::unique_ptr<KernelInfo> pKernelInfo;
|
||||
SPatchMediaVFEState *pMediaVFEstate = nullptr;
|
||||
SPatchExecutionEnvironment *pExecutionEnvironment;
|
||||
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = nullptr;
|
||||
MockProgram *pProgram = nullptr;
|
||||
MockKernel *pKernel = nullptr;
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "opencl/test/unit_test/fixtures/image_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
|
||||
|
||||
#include <iostream>
|
||||
using namespace NEO;
|
||||
|
||||
void HardwareCommandsTest::SetUp() {
|
||||
@@ -560,6 +561,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
|
||||
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0;
|
||||
KernelInfoContainer kernelInfos;
|
||||
modifiedKernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
kernelInfos.push_back(&modifiedKernelInfo);
|
||||
MockKernel mockKernel(kernel->getProgram(), kernelInfos, false);
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
@@ -583,18 +585,21 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
|
||||
true,
|
||||
*pDevice);
|
||||
|
||||
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize());
|
||||
size_t expectedIohSize = ((modifiedKernelInfo.getMaxSimdSize() == 32) ? 32 : 16) * 3 * numThreads * sizeof(uint16_t);
|
||||
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
|
||||
ASSERT_NE(nullptr, modifiedKernelInfo.patchInfo.threadPayload);
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*modifiedKernelInfo.patchInfo.threadPayload);
|
||||
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize);
|
||||
ASSERT_LE(expectedIohSize, ioh.getUsed());
|
||||
|
||||
auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
|
||||
uint32_t grfSize = sizeof(typename FamilyType::GRF);
|
||||
generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(),
|
||||
std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
|
||||
std::array<uint8_t, 3>{{modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
|
||||
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
|
||||
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}},
|
||||
false, grfSize);
|
||||
|
||||
EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
|
||||
alignedFree(expectedLocalIds);
|
||||
}
|
||||
@@ -684,12 +689,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// define patch offsets for global, constant, private, event pool and default device queue surfaces
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
@@ -854,6 +854,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTableStatesForKernelThenSshIsNotUsed) {
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// create program with valid context
|
||||
MockContext context;
|
||||
@@ -909,6 +910,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl
|
||||
HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStatesThenPointerIsZero) {
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// create program with valid context
|
||||
MockContext context;
|
||||
|
||||
@@ -34,16 +34,14 @@ struct PerThreadDataTests : public ClDeviceFixture,
|
||||
threadPayload.UnusedPerThreadConstantPresent =
|
||||
!(localIdX || localIdY || localIdZ || flattenedId);
|
||||
|
||||
executionEnvironment = {};
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
executionEnvironment.LargestCompiledSIMDSize = 32;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
simd = executionEnvironment.LargestCompiledSIMDSize;
|
||||
simd = kernelInfo.getMaxSimdSize();
|
||||
numChannels = threadPayload.LocalIDXPresent +
|
||||
threadPayload.LocalIDYPresent +
|
||||
threadPayload.LocalIDZPresent;
|
||||
@@ -69,7 +67,6 @@ struct PerThreadDataTests : public ClDeviceFixture,
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchExecutionEnvironment executionEnvironment;
|
||||
KernelInfo kernelInfo;
|
||||
};
|
||||
|
||||
|
||||
@@ -240,7 +240,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
|
||||
EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference);
|
||||
EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle);
|
||||
EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode);
|
||||
EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired);
|
||||
EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired);
|
||||
EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings);
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
|
||||
EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush);
|
||||
|
||||
@@ -46,6 +46,7 @@ class CloneKernelFixture : public ContextFixture, public ClDeviceFixture {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
@@ -45,6 +45,8 @@ class KernelArgAcceleratorFixture : public ContextFixture, public ClDeviceFixtur
|
||||
ContextFixture::SetUp(1, &device);
|
||||
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
pKernelInfo->kernelArgInfo.resize(1);
|
||||
|
||||
@@ -33,6 +33,7 @@ void KernelArgBufferFixture::SetUp() {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
@@ -23,6 +23,8 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
|
||||
pDeviceQueue = createQueueObject();
|
||||
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
pKernelInfo->kernelArgInfo.resize(1);
|
||||
pKernelInfo->kernelArgInfo[0].isDeviceQueue = true;
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ class KernelArgPipeFixture : public ContextFixture, public ClDeviceFixture {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
@@ -38,6 +38,7 @@ class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
@@ -28,6 +28,7 @@ class KernelArgImmediateTest : public Test<ClDeviceFixture> {
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup kernel arg offsets
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
@@ -654,9 +654,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl
|
||||
cl_queue_properties properties[1] = {0};
|
||||
DeviceQueue devQueue(&context, device.get(), properties[0]);
|
||||
|
||||
SPatchExecutionEnvironment environment = {};
|
||||
environment.HasDeviceEnqueue = 1;
|
||||
info.patchInfo.executionEnvironment = &environment;
|
||||
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
@@ -709,9 +707,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe
|
||||
uint32_t devQueueCurbeOffset = 16;
|
||||
uint32_t devQueueCurbeSize = 4;
|
||||
|
||||
SPatchExecutionEnvironment environment = {};
|
||||
environment.HasDeviceEnqueue = 1;
|
||||
info.patchInfo.executionEnvironment = &environment;
|
||||
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
@@ -1245,11 +1241,8 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
|
||||
|
||||
info.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
|
||||
executionEnvironment = {};
|
||||
executionEnvironment.LargestCompiledSIMDSize = 16;
|
||||
executionEnvironment.HasBarriers = 1;
|
||||
|
||||
info.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
info.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
info.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
|
||||
info.patchInfo.threadPayload = &threadPayload;
|
||||
|
||||
@@ -1274,7 +1267,6 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
|
||||
KernelInfo info;
|
||||
SPatchSamplerStateArray samplerStateArray;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SPatchExecutionEnvironment executionEnvironment;
|
||||
SPatchThreadPayload threadPayload;
|
||||
SPatchAllocateStatelessPrivateSurface privateSurface;
|
||||
|
||||
@@ -1330,8 +1322,8 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
|
||||
EXPECT_EQ(dataParameterStream.DataParameterStreamSize, kernelData->m_sizeOfConstantBuffer);
|
||||
EXPECT_EQ(tokenMask, kernelData->m_PatchTokensMask);
|
||||
EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue);
|
||||
EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize);
|
||||
EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.simdSize, kernelData->m_SIMDSize);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.barrierCount, kernelData->m_HasBarriers);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], kernelData->m_RequiredWkgSizes[0]);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelData->m_RequiredWkgSizes[1]);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2], kernelData->m_RequiredWkgSizes[2]);
|
||||
@@ -1356,36 +1348,6 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
}
|
||||
|
||||
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullExecutionEnvironmentWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
|
||||
info.patchInfo.executionEnvironment = nullptr;
|
||||
|
||||
std::unique_ptr<char> kernelDataMemory(new char[4096]);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParams;
|
||||
|
||||
uint64_t tokenMask = 1 | 2 | 4;
|
||||
|
||||
size_t maxConstantBufferSize = 32;
|
||||
size_t samplerCount = 1;
|
||||
size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize();
|
||||
|
||||
uint32_t offsetInKernelDataMemory = 0;
|
||||
uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory,
|
||||
curbeParams, tokenMask, maxConstantBufferSize, samplerCount,
|
||||
info, pPlatform->getClDevice(0)->getHardwareInfo());
|
||||
|
||||
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(kernelDataMemory.get() + offsetInKernelDataMemory);
|
||||
|
||||
EXPECT_EQ(0u, kernelData->m_SIMDSize);
|
||||
EXPECT_EQ(0u, kernelData->m_HasBarriers);
|
||||
|
||||
size_t expectedOffset = offsetInKernelDataMemory;
|
||||
expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *));
|
||||
expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams);
|
||||
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
}
|
||||
|
||||
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
|
||||
info.patchInfo.threadPayload = nullptr;
|
||||
|
||||
@@ -2128,9 +2090,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl
|
||||
cl_queue_properties properties[1] = {0};
|
||||
DeviceQueue devQueue(context.get(), device1, properties[0]);
|
||||
|
||||
SPatchExecutionEnvironment environment = {};
|
||||
environment.HasDeviceEnqueue = 1;
|
||||
info.patchInfo.executionEnvironment = &environment;
|
||||
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
@@ -2185,9 +2145,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe
|
||||
uint32_t devQueueCurbeOffset = 16;
|
||||
uint32_t devQueueCurbeSize = 4;
|
||||
|
||||
SPatchExecutionEnvironment environment = {};
|
||||
environment.HasDeviceEnqueue = 1;
|
||||
info.patchInfo.executionEnvironment = &environment;
|
||||
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
dataParameterStream.Size = 0;
|
||||
|
||||
@@ -24,6 +24,8 @@ class KernelSlmArgTest : public Test<ClDeviceFixture> {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
pKernelInfo->kernelArgInfo.resize(3);
|
||||
|
||||
@@ -25,11 +25,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
|
||||
executionEnvironment = {};
|
||||
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
executionEnvironment.LargestCompiledSIMDSize = 32;
|
||||
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
@@ -38,7 +33,9 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
}
|
||||
void TearDown() override {
|
||||
@@ -52,7 +49,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader;
|
||||
SPatchDataParameterStream dataParameterStream;
|
||||
SPatchExecutionEnvironment executionEnvironment;
|
||||
SPatchThreadPayload threadPayload;
|
||||
KernelInfo kernelInfo;
|
||||
|
||||
@@ -69,7 +65,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr
|
||||
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
// define kernel info
|
||||
executionEnvironment.HasBarriers = 1;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB;
|
||||
|
||||
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex));
|
||||
@@ -135,7 +131,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr
|
||||
}
|
||||
ASSERT_GT(ExpectedSLMSize, 0u);
|
||||
EXPECT_EQ(ExpectedSLMSize, pSrcIDData->getSharedLocalMemorySize());
|
||||
EXPECT_EQ(!!executionEnvironment.HasBarriers, pSrcIDData->getBarrierEnable());
|
||||
EXPECT_EQ(kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), pSrcIDData->getBarrierEnable());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, pSrcIDData->getDenormMode());
|
||||
|
||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
|
||||
@@ -430,7 +430,6 @@ TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) {
|
||||
EXPECT_EQ(nullptr, patchInfo.bindingTableState);
|
||||
EXPECT_EQ(nullptr, patchInfo.dataParameterStream);
|
||||
EXPECT_EQ(nullptr, patchInfo.threadPayload);
|
||||
EXPECT_EQ(nullptr, patchInfo.executionEnvironment);
|
||||
EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo);
|
||||
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface);
|
||||
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization);
|
||||
@@ -549,11 +548,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// create kernel
|
||||
MockContext context;
|
||||
@@ -590,9 +585,7 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
@@ -631,11 +624,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// create kernel
|
||||
MockContext context;
|
||||
@@ -676,11 +665,7 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS
|
||||
tokenDPS.DataParameterStreamSize = 64;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// create kernel
|
||||
MockContext context;
|
||||
@@ -700,11 +685,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface;
|
||||
@@ -752,11 +733,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup global memory
|
||||
char buffer[16];
|
||||
@@ -801,12 +778,11 @@ TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamWhenGettingConst
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
|
||||
*executionEnvironment = {};
|
||||
executionEnvironment->CompiledSIMD32 = 32;
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
|
||||
@@ -820,12 +796,11 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
|
||||
*executionEnvironment = {};
|
||||
executionEnvironment->CompiledSIMD32 = 32;
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
|
||||
@@ -839,12 +814,11 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
|
||||
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) {
|
||||
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
|
||||
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
|
||||
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
|
||||
*executionEnvironment = {};
|
||||
executionEnvironment->CompiledSIMD32 = 32;
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
|
||||
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
MockContext context;
|
||||
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
|
||||
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
|
||||
@@ -871,11 +845,7 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
char buffer[16];
|
||||
|
||||
@@ -914,11 +884,7 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
char buffer[16];
|
||||
|
||||
@@ -944,11 +910,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup global memory
|
||||
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
|
||||
@@ -998,12 +960,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup global memory
|
||||
char buffer[16];
|
||||
@@ -1044,11 +1001,7 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
char buffer[16];
|
||||
|
||||
@@ -1086,11 +1039,7 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst
|
||||
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
|
||||
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
char buffer[16];
|
||||
|
||||
@@ -1115,12 +1064,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup constant memory
|
||||
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
|
||||
@@ -1170,12 +1114,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup global memory
|
||||
char buffer[16];
|
||||
@@ -1204,12 +1143,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup event pool surface
|
||||
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
|
||||
@@ -1253,12 +1187,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup event pool surface
|
||||
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
|
||||
@@ -1302,13 +1231,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr;
|
||||
|
||||
// create kernel
|
||||
@@ -1334,12 +1257,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup event pool surface
|
||||
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
|
||||
@@ -1370,12 +1288,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup event pool surface
|
||||
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
|
||||
@@ -1408,12 +1321,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup default device queue surface
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
|
||||
@@ -1457,12 +1365,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup default device queue surface
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
|
||||
@@ -1508,12 +1411,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup default device queue surface
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
|
||||
@@ -1542,13 +1440,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
|
||||
|
||||
// create kernel
|
||||
@@ -1574,12 +1466,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
SPatchExecutionEnvironment tokenEE = {};
|
||||
tokenEE.CompiledSIMD8 = false;
|
||||
tokenEE.CompiledSIMD16 = false;
|
||||
tokenEE.CompiledSIMD32 = true;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
// setup default device queue surface
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
|
||||
@@ -1616,6 +1503,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -1651,6 +1540,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFunctionsIsaAllocationIsMadeResident) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -1689,6 +1580,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBufferIsMadeResident) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -2030,6 +1923,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
|
||||
|
||||
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelArgInfo.resize(3);
|
||||
pKernelInfo->kernelArgInfo[2].isImage = true;
|
||||
pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true;
|
||||
@@ -2047,6 +1941,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasIma
|
||||
|
||||
TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelArgInfo.resize(3);
|
||||
pKernelInfo->kernelArgInfo[2].isImage = true;
|
||||
pKernelInfo->kernelArgInfo[1].isBuffer = true;
|
||||
@@ -2064,6 +1959,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfIt
|
||||
|
||||
TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->kernelArgInfo.resize(1);
|
||||
pKernelInfo->kernelArgInfo[0].isBuffer = true;
|
||||
|
||||
@@ -2118,6 +2014,8 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor
|
||||
EXPECT_EQ(imageY->getMediaPlaneType(), 0u);
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
KernelArgInfo kernelArgInfo;
|
||||
kernelArgInfo.isImage = true;
|
||||
|
||||
@@ -2144,8 +2042,7 @@ struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
|
||||
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
|
||||
@@ -2163,65 +2060,6 @@ struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
};
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledSimd32TrueWhenGettingMaxSimdSizeThen32IsReturned) {
|
||||
|
||||
executionEnvironment.CompiledSIMD32 = true;
|
||||
executionEnvironment.CompiledSIMD16 = true;
|
||||
executionEnvironment.CompiledSIMD8 = true;
|
||||
|
||||
EXPECT_EQ(32u, this->pKernelInfo->getMaxSimdSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledSimd32FalseAndCompiledSimd16TrueWhenGettingMaxSimdSizeThen16IsReturned) {
|
||||
|
||||
executionEnvironment.CompiledSIMD32 = false;
|
||||
executionEnvironment.CompiledSIMD16 = true;
|
||||
executionEnvironment.CompiledSIMD8 = true;
|
||||
|
||||
EXPECT_EQ(16u, this->pKernelInfo->getMaxSimdSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenOnlyCompiledSimd8TrueWhenGettingMaxSimdSizeThen8IsReturned) {
|
||||
|
||||
executionEnvironment.CompiledSIMD32 = false;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD8 = true;
|
||||
|
||||
EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenAllCompiledSimdFalseWhenGettingMaxSimdSizeThen8IsReturned) {
|
||||
|
||||
executionEnvironment.CompiledSIMD32 = false;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD8 = false;
|
||||
|
||||
EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize());
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenExecutionEnvironmentNotAvailableWhenGettingMaxSimdSizeThen1IsReturned) {
|
||||
|
||||
executionEnvironment.CompiledSIMD32 = false;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD8 = false;
|
||||
|
||||
auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment;
|
||||
|
||||
this->pKernelInfo->patchInfo.executionEnvironment = nullptr;
|
||||
EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize());
|
||||
this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv;
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenLargestCompiledSimdSizeEqualOneWhenGettingMaxSimdSizeThen1IsReturned) {
|
||||
|
||||
executionEnvironment.LargestCompiledSIMDSize = 1;
|
||||
|
||||
auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment;
|
||||
|
||||
EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize());
|
||||
this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv;
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsZeroWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
|
||||
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
@@ -2285,8 +2123,7 @@ struct KernelCrossThreadTests : Test<ClDeviceFixture> {
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
ASSERT_NE(nullptr, pKernelInfo);
|
||||
pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream;
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@@ -2394,7 +2231,6 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) {
|
||||
|
||||
pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12;
|
||||
|
||||
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
@@ -2408,12 +2244,9 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi
|
||||
}
|
||||
|
||||
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) {
|
||||
|
||||
pKernelInfo->workloadInfo.simdSizeOffset = 16;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
|
||||
executionEnvironment.CompiledSIMD32 = false;
|
||||
executionEnvironment.CompiledSIMD16 = true;
|
||||
executionEnvironment.CompiledSIMD8 = true;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize);
|
||||
@@ -2504,10 +2337,7 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo
|
||||
|
||||
// add a new block kernel to program
|
||||
auto infoBlock = new KernelInfo();
|
||||
kernel->executionEnvironmentBlock.CompiledSIMD8 = 0;
|
||||
kernel->executionEnvironmentBlock.CompiledSIMD16 = 1;
|
||||
kernel->executionEnvironmentBlock.CompiledSIMD32 = 0;
|
||||
infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock);
|
||||
|
||||
// patch block's simd size
|
||||
@@ -2649,15 +2479,18 @@ TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMa
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = CommonConstants::maximalSimdSize;
|
||||
|
||||
size_t maxKernelWkgSize;
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(1024u, maxKernelWkgSize);
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = 16;
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(512u, maxKernelWkgSize);
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = 8;
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8;
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(256u, maxKernelWkgSize);
|
||||
}
|
||||
@@ -2823,9 +2656,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializin
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
auto minSimd = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).getMinimalSIMDSize();
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.executionEnvironment.CompiledSIMD32 = 0;
|
||||
kernel.executionEnvironment.CompiledSIMD16 = 0;
|
||||
kernel.executionEnvironment.CompiledSIMD8 = 1;
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8;
|
||||
|
||||
cl_int retVal = kernel.mockKernel->initialize();
|
||||
|
||||
@@ -2839,10 +2670,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializin
|
||||
TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.executionEnvironment.CompiledSIMD32 = 0;
|
||||
kernel.executionEnvironment.CompiledSIMD16 = 0;
|
||||
kernel.executionEnvironment.CompiledSIMD8 = 0;
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = 1;
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
cl_int retVal = kernel.mockKernel->initialize();
|
||||
|
||||
@@ -3133,10 +2961,11 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKe
|
||||
}
|
||||
|
||||
TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) {
|
||||
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
|
||||
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
|
||||
UltClDeviceFactory deviceFactory{1, 0};
|
||||
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment};
|
||||
|
||||
SPatchExecutionEnvironment sPatchExecEnv = {};
|
||||
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
|
||||
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecEnv};
|
||||
|
||||
auto &mockKernel = *mockKernelWithInternals.mockKernel;
|
||||
auto &hwHelper = HwHelper::get(deviceFactory.rootDevices[0]->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
@@ -3144,10 +2973,9 @@ TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaul
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWhenSettingAdditinalKernelExecInfoThenCorrectValueIsSet) {
|
||||
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
|
||||
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
|
||||
UltClDeviceFactory deviceFactory{1, 0};
|
||||
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment};
|
||||
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0]};
|
||||
mockKernelWithInternals.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = true;
|
||||
|
||||
auto &mockKernel = *mockKernelWithInternals.mockKernel;
|
||||
|
||||
@@ -3219,6 +3047,7 @@ using KernelMultiRootDeviceTest = MultiRootDeviceFixture;
|
||||
|
||||
TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThenPrivateSurfacesHaveCorrectRootDeviceIndex) {
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// setup private memory
|
||||
SPatchAllocateStatelessPrivateSurface tokenSPS;
|
||||
|
||||
@@ -23,6 +23,8 @@ class KernelTransformableTest : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
context = std::make_unique<MockContext>(deviceFactory.rootDevices[rootDeviceIndex]);
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
KernelArgPatchInfo kernelArgPatchInfo;
|
||||
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
|
||||
@@ -34,13 +34,10 @@ class MockKernelWithArgumentAccess : public Kernel {
|
||||
};
|
||||
|
||||
TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) {
|
||||
KernelInfo info;
|
||||
MockClDevice *device = new MockClDevice{new MockDevice};
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
SPatchExecutionEnvironment environment = {};
|
||||
environment.HasDeviceEnqueue = 1;
|
||||
|
||||
info.patchInfo.executionEnvironment = &environment;
|
||||
KernelInfo info;
|
||||
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
MockKernelWithArgumentAccess kernel(&program, MockKernel::toKernelInfoContainer(info, device->getRootDeviceIndex()));
|
||||
|
||||
@@ -155,10 +152,7 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA
|
||||
|
||||
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
|
||||
|
||||
SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment;
|
||||
*executionEnvironmentBlock = {};
|
||||
executionEnvironmentBlock->HasDeviceEnqueue = 1;
|
||||
infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
|
||||
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
|
||||
streamBlock->DataParameterStreamSize = 0;
|
||||
|
||||
@@ -32,10 +32,7 @@ class BufferSetArgTest : public ContextFixture,
|
||||
using ContextFixture::SetUp;
|
||||
|
||||
public:
|
||||
BufferSetArgTest()
|
||||
|
||||
{
|
||||
}
|
||||
BufferSetArgTest() {}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
@@ -43,7 +40,7 @@ class BufferSetArgTest : public ContextFixture,
|
||||
cl_device_id device = pClDevice;
|
||||
ContextFixture::SetUp(1, &device);
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
ASSERT_NE(nullptr, pKernelInfo);
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// define kernel info
|
||||
// setup kernel arg offsets
|
||||
|
||||
@@ -64,6 +64,7 @@ class ImageSetArgTest : public ClDeviceFixture,
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
@@ -918,6 +919,7 @@ class ImageMediaBlockSetArgTest : public ImageSetArgTest {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#include <cassert>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Kernel - Core implementation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -107,7 +107,6 @@ class MockKernel : public Kernel {
|
||||
}
|
||||
|
||||
if (kernelInfoAllocated) {
|
||||
delete kernelInfoAllocated->patchInfo.executionEnvironment;
|
||||
delete kernelInfoAllocated->patchInfo.threadPayload;
|
||||
delete kernelInfoAllocated;
|
||||
}
|
||||
@@ -132,12 +131,9 @@ class MockKernel : public Kernel {
|
||||
|
||||
info->patchInfo.threadPayload = threadPayload;
|
||||
|
||||
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
|
||||
memset(executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
|
||||
executionEnvironment->HasDeviceEnqueue = 0;
|
||||
executionEnvironment->NumGRFRequired = grfNumber;
|
||||
executionEnvironment->CompiledSIMD32 = 1;
|
||||
info->patchInfo.executionEnvironment = executionEnvironment;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
|
||||
info->kernelDescriptor.kernelAttributes.numGrfRequired = grfNumber;
|
||||
info->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
info->crossThreadData = new char[crossThreadSize];
|
||||
|
||||
@@ -258,17 +254,12 @@ class MockKernel : public Kernel {
|
||||
//class below have enough internals to service Enqueue operation.
|
||||
class MockKernelWithInternals {
|
||||
public:
|
||||
MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment newExecutionEnvironment = {}) {
|
||||
MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {
|
||||
memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon));
|
||||
memset(&threadPayload, 0, sizeof(SPatchThreadPayload));
|
||||
memcpy(&executionEnvironment, &newExecutionEnvironment, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment));
|
||||
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
|
||||
memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState));
|
||||
memset(&mediaVfeStateSlot1, 0, sizeof(SPatchMediaVFEState));
|
||||
executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
@@ -277,7 +268,11 @@ class MockKernelWithInternals {
|
||||
kernelInfo.heapInfo.pDsh = dshLocal;
|
||||
kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(sshLocal);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, execEnv);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
kernelInfo.patchInfo.mediavfestate = &mediaVfeState;
|
||||
kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVfeStateSlot1;
|
||||
@@ -328,8 +323,7 @@ class MockKernelWithInternals {
|
||||
kernelInfo.kernelArgInfo[0].offsetHeap = 64;
|
||||
}
|
||||
}
|
||||
|
||||
MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment newExecutionEnvironment) : MockKernelWithInternals(deviceArg, nullptr, false, newExecutionEnvironment) {
|
||||
MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment execEnv) : MockKernelWithInternals(deviceArg, nullptr, false, execEnv) {
|
||||
mockKernel->initialize();
|
||||
}
|
||||
|
||||
@@ -353,8 +347,6 @@ class MockKernelWithInternals {
|
||||
SPatchMediaVFEState mediaVfeState = {};
|
||||
SPatchMediaVFEState mediaVfeStateSlot1 = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchExecutionEnvironment executionEnvironmentBlock = {};
|
||||
uint32_t kernelIsa[32];
|
||||
char crossThreadData[256];
|
||||
char sshLocal[128];
|
||||
@@ -390,12 +382,9 @@ class MockParentKernel : public Kernel {
|
||||
|
||||
info->patchInfo.threadPayload = threadPayload;
|
||||
|
||||
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
|
||||
*executionEnvironment = {};
|
||||
executionEnvironment->HasDeviceEnqueue = 1;
|
||||
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironment->CompiledSIMD32 = 1;
|
||||
info->patchInfo.executionEnvironment = executionEnvironment;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
info->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface;
|
||||
allocateDeviceQueue->DataParamOffset = crossThreadOffset;
|
||||
@@ -509,11 +498,9 @@ class MockParentKernel : public Kernel {
|
||||
|
||||
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
|
||||
|
||||
SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment;
|
||||
executionEnvironmentBlock->HasDeviceEnqueue = 1;
|
||||
executionEnvironmentBlock->NumGRFRequired = GrfConfig::DefaultGrfNumber;
|
||||
executionEnvironmentBlock->CompiledSIMD32 = 1;
|
||||
infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
|
||||
streamBlock->DataParameterStreamSize = 0;
|
||||
@@ -553,7 +540,6 @@ class MockParentKernel : public Kernel {
|
||||
continue;
|
||||
}
|
||||
auto &kernelInfo = *pKernelInfo;
|
||||
delete kernelInfo.patchInfo.executionEnvironment;
|
||||
delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface;
|
||||
delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface;
|
||||
delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface;
|
||||
@@ -567,7 +553,6 @@ class MockParentKernel : public Kernel {
|
||||
delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface;
|
||||
delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface;
|
||||
delete blockInfo->patchInfo.threadPayload;
|
||||
delete blockInfo->patchInfo.executionEnvironment;
|
||||
delete blockInfo->patchInfo.dataParameterStream;
|
||||
delete blockInfo->patchInfo.bindingTableState;
|
||||
delete blockInfo->patchInfo.interfaceDescriptorData;
|
||||
|
||||
@@ -6,13 +6,13 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
class ThreadGroupPreemptionTests : public DevicePreemptionTests {
|
||||
void SetUp() override {
|
||||
dbgRestore.reset(new DebugManagerStateRestore());
|
||||
@@ -50,7 +50,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowByDevice) {
|
||||
|
||||
TEST_F(ThreadGroupPreemptionTests, disallowByReadWriteFencesWA) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment->UsesFencesForReadWriteImages = 1u;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true;
|
||||
waTable->waDisableLSQCROPERFforOCL = 1;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags));
|
||||
@@ -87,18 +87,9 @@ TEST_F(ThreadGroupPreemptionTests, allowDefaultModeForNonKernelRequest) {
|
||||
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
|
||||
}
|
||||
|
||||
TEST_F(ThreadGroupPreemptionTests, givenKernelWithNoEnvironmentPatchSetWhenLSQCWaIsTurnedOnThenThreadGroupPreemptionIsBeingSelected) {
|
||||
PreemptionFlags flags = {};
|
||||
kernelInfo.get()->patchInfo.executionEnvironment = nullptr;
|
||||
waTable->waDisableLSQCROPERFforOCL = 1;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
|
||||
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
|
||||
}
|
||||
|
||||
TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOnThenThreadGroupPreemptionIsBeingSelected) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment.get()->UsesFencesForReadWriteImages = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = false;
|
||||
waTable->waDisableLSQCROPERFforOCL = 1;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
|
||||
@@ -107,7 +98,7 @@ TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaI
|
||||
|
||||
TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOffThenThreadGroupPreemptionIsBeingSelected) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment.get()->UsesFencesForReadWriteImages = 1;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true;
|
||||
waTable->waDisableLSQCROPERFforOCL = 0;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
|
||||
@@ -167,7 +158,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowDefaultDeviceModeWhenAtLeastOneInvali
|
||||
TEST_F(MidThreadPreemptionTests, allowMidThreadPreemption) {
|
||||
PreemptionFlags flags = {};
|
||||
device->setPreemptionMode(PreemptionMode::MidThread);
|
||||
executionEnvironment->DisableMidThreadPreemption = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags));
|
||||
}
|
||||
@@ -192,7 +183,7 @@ TEST_F(MidThreadPreemptionTests, allowMidThreadPreemptionDeviceSupportPreemption
|
||||
TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByDevice) {
|
||||
PreemptionFlags flags = {};
|
||||
device->setPreemptionMode(PreemptionMode::ThreadGroup);
|
||||
executionEnvironment->DisableMidThreadPreemption = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags));
|
||||
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
|
||||
@@ -201,7 +192,7 @@ TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByDevice) {
|
||||
TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByKernel) {
|
||||
PreemptionFlags flags = {};
|
||||
device->setPreemptionMode(PreemptionMode::MidThread);
|
||||
executionEnvironment->DisableMidThreadPreemption = 1;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true;
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags));
|
||||
}
|
||||
@@ -218,7 +209,7 @@ TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByVmeKernel) {
|
||||
|
||||
TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByDevice) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment->DisableMidThreadPreemption = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
|
||||
device->setPreemptionMode(PreemptionMode::ThreadGroup);
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
|
||||
@@ -227,7 +218,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByDevice) {
|
||||
|
||||
TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByKernel) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment->DisableMidThreadPreemption = 1;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true;
|
||||
device->setPreemptionMode(PreemptionMode::MidThread);
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
|
||||
@@ -248,7 +239,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByVmeKernel) {
|
||||
|
||||
TEST_F(MidThreadPreemptionTests, taskPreemptionAllow) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment->DisableMidThreadPreemption = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
|
||||
device->setPreemptionMode(PreemptionMode::MidThread);
|
||||
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
|
||||
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
|
||||
@@ -257,7 +248,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionAllow) {
|
||||
|
||||
TEST_F(MidThreadPreemptionTests, taskPreemptionAllowDeviceSupportsPreemptionOnVmeKernel) {
|
||||
PreemptionFlags flags = {};
|
||||
executionEnvironment->DisableMidThreadPreemption = 0;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
|
||||
kernelInfo->isVmeWorkload = true;
|
||||
kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex)));
|
||||
device->sharedDeviceInfo.vmeAvcSupportsPreemption = true;
|
||||
|
||||
@@ -39,10 +39,7 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
|
||||
executionEnvironment = {};
|
||||
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
executionEnvironment.LargestCompiledSIMDSize = 32;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
@@ -52,7 +49,6 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
|
||||
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
|
||||
kernelInfo.patchInfo.threadPayload = &threadPayload;
|
||||
}
|
||||
|
||||
@@ -64,7 +60,6 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
KernelInfo kernelInfo;
|
||||
MockContext ctx;
|
||||
|
||||
@@ -303,9 +303,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
|
||||
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
|
||||
executionEnvironment.HasBarriers = false;
|
||||
executionEnvironment.DisableMidThreadPreemption = true;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD32 = true;
|
||||
executionEnvironment.CompiledSIMD8 = false;
|
||||
executionEnvironment.HasDeviceEnqueue = false;
|
||||
executionEnvironment.MayAccessUndeclaredResource = false;
|
||||
executionEnvironment.UsesFencesForReadWriteImages = false;
|
||||
@@ -322,7 +319,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
@@ -340,9 +336,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
|
||||
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
|
||||
executionEnvironment.HasBarriers = false;
|
||||
executionEnvironment.DisableMidThreadPreemption = true;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD32 = true;
|
||||
executionEnvironment.CompiledSIMD8 = false;
|
||||
executionEnvironment.HasDeviceEnqueue = false;
|
||||
executionEnvironment.MayAccessUndeclaredResource = false;
|
||||
executionEnvironment.UsesFencesForReadWriteImages = false;
|
||||
@@ -359,7 +352,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
@@ -378,9 +370,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentCompiledForGreaterThan4gbBuffers
|
||||
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
|
||||
executionEnvironment.HasBarriers = false;
|
||||
executionEnvironment.DisableMidThreadPreemption = true;
|
||||
executionEnvironment.CompiledSIMD16 = false;
|
||||
executionEnvironment.CompiledSIMD32 = true;
|
||||
executionEnvironment.CompiledSIMD8 = false;
|
||||
executionEnvironment.HasDeviceEnqueue = false;
|
||||
executionEnvironment.MayAccessUndeclaredResource = false;
|
||||
executionEnvironment.UsesFencesForReadWriteImages = false;
|
||||
@@ -396,7 +385,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentCompiledForGreaterThan4gbBuffers
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_FALSE(pKernelInfo->requiresSshForBuffers);
|
||||
}
|
||||
|
||||
@@ -411,7 +399,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentDoesntHaveDeviceEnqueueWhenBuild
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(0u, program->getParentKernelInfoArray(rootDeviceIndex).size());
|
||||
}
|
||||
|
||||
@@ -426,7 +413,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentHasDeviceEnqueueWhenBuildingThen
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(1u, program->getParentKernelInfoArray(rootDeviceIndex).size());
|
||||
}
|
||||
|
||||
@@ -441,7 +427,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentDoesntRequireSubgroupIndependent
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(0u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size());
|
||||
}
|
||||
|
||||
@@ -456,7 +441,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(1u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size());
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ class SamplerSetArgFixture : public ClDeviceFixture {
|
||||
void SetUp() {
|
||||
ClDeviceFixture::SetUp();
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
// define kernel info
|
||||
pKernelInfo->heapInfo.pDsh = samplerStateHeap;
|
||||
|
||||
@@ -35,12 +35,10 @@ class MockSchedulerKernel : public SchedulerKernel {
|
||||
dataParametrStream.DataParameterStreamSize = 8;
|
||||
dataParametrStream.Size = 8;
|
||||
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
executionEnvironment.CompiledSIMD32 = 1;
|
||||
executionEnvironment.HasDeviceEnqueue = 0;
|
||||
info->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
|
||||
|
||||
info->patchInfo.dataParameterStream = &dataParametrStream;
|
||||
info->patchInfo.executionEnvironment = &executionEnvironment;
|
||||
KernelArgInfo bufferArg;
|
||||
bufferArg.isBuffer = true;
|
||||
|
||||
|
||||
@@ -346,6 +346,7 @@ TEST(FileLogger, GivenNullMdiWhenDumpingKernelsThenFileIsNotCreated) {
|
||||
|
||||
TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKernelArgsForMdi) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -381,6 +382,7 @@ TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKern
|
||||
|
||||
TEST(FileLogger, GivenMdiWhenDumpingKernelArgsThenFileIsCreated) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -426,6 +428,7 @@ TEST(FileLogger, GivenNullWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -442,6 +445,8 @@ TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -474,8 +479,8 @@ TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) {
|
||||
}
|
||||
|
||||
TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -504,8 +509,8 @@ TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated
|
||||
}
|
||||
|
||||
TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockProgram program(toClDeviceVector(*device));
|
||||
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
@@ -528,6 +533,7 @@ TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
TEST(FileLogger, GivenBufferNotSetWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
@@ -564,6 +570,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) {
|
||||
cl_mem clObj = buffer;
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
|
||||
|
||||
@@ -601,6 +608,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) {
|
||||
|
||||
TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
@@ -627,8 +635,8 @@ TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
}
|
||||
|
||||
TEST(FileLogger, GivenImageNotSetWhenDumpingKernelArgsThenFileIsNotCreated) {
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto context = clUniquePtr(new MockContext(device.get()));
|
||||
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
|
||||
|
||||
@@ -69,7 +69,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
|
||||
|
||||
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
|
||||
kernelDescriptor.kernelAttributes.hasBarriers,
|
||||
kernelDescriptor.kernelAttributes.barrierCount,
|
||||
hwInfo);
|
||||
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
|
||||
HwHelperHw<Family>::get().computeSlmValues(hwInfo, dispatchInterface->getSlmTotalSize()));
|
||||
|
||||
@@ -60,14 +60,10 @@ PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreempt
|
||||
|
||||
void PreemptionHelper::setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel) {
|
||||
if (kernel) {
|
||||
const auto &kernelInfo = kernel->getKernelInfo(device.getRootDeviceIndex());
|
||||
flags.flags.disabledMidThreadPreemptionKernel =
|
||||
kernelInfo.patchInfo.executionEnvironment &&
|
||||
kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption;
|
||||
const auto &kernelDescriptor = kernel->getKernelInfo(device.getRootDeviceIndex()).kernelDescriptor;
|
||||
flags.flags.disabledMidThreadPreemptionKernel = kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption;
|
||||
flags.flags.vmeKernel = kernel->isVmeKernel();
|
||||
flags.flags.usesFencesForReadWriteImages =
|
||||
kernelInfo.patchInfo.executionEnvironment &&
|
||||
kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages;
|
||||
flags.flags.usesFencesForReadWriteImages = kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages;
|
||||
flags.flags.schedulerKernel = kernel->isSchedulerKernel;
|
||||
}
|
||||
flags.flags.deviceSupportsVmePreemption = device.getDeviceInfo().vmeAvcSupportsPreemption;
|
||||
|
||||
@@ -798,8 +798,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
|
||||
}
|
||||
}
|
||||
|
||||
kernelDescriptor.kernelAttributes.hasBarriers = execEnv.barrierCount;
|
||||
kernelDescriptor.kernelAttributes.flags.usesBarriers = (kernelDescriptor.kernelAttributes.hasBarriers > 0U);
|
||||
kernelDescriptor.kernelAttributes.barrierCount = execEnv.barrierCount;
|
||||
kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = execEnv.disableMidThreadPreemption;
|
||||
kernelDescriptor.kernelAttributes.numGrfRequired = execEnv.grfCount;
|
||||
if (execEnv.has4GBBuffers) {
|
||||
|
||||
@@ -17,6 +17,8 @@ set(NEO_CORE_KERNEL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/read_extended_info.cpp
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})
|
||||
|
||||
@@ -26,6 +26,10 @@ namespace NEO {
|
||||
using StringMap = std::unordered_map<uint32_t, std::string>;
|
||||
using InstructionsSegmentOffset = uint16_t;
|
||||
|
||||
struct ExtendedInfoBase {
|
||||
virtual ~ExtendedInfoBase() = default;
|
||||
};
|
||||
|
||||
struct KernelDescriptor final {
|
||||
enum AddressingMode : uint8_t {
|
||||
AddrNone,
|
||||
@@ -46,12 +50,13 @@ struct KernelDescriptor final {
|
||||
uint32_t perThreadScratchSize[2] = {0U, 0U};
|
||||
uint32_t perHwThreadPrivateMemorySize = 0U;
|
||||
uint32_t perThreadSystemThreadSurfaceSize = 0U;
|
||||
uint32_t hasBarriers = 0u;
|
||||
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
|
||||
uint16_t crossThreadDataSize = 0U;
|
||||
uint16_t perThreadDataSize = 0U;
|
||||
uint16_t numArgsToPatch = 0U;
|
||||
uint16_t numGrfRequired = 0U;
|
||||
uint8_t barrierCount = 0u;
|
||||
|
||||
AddressingMode bufferAddressingMode = BindfulAndStateless;
|
||||
AddressingMode imageAddressingMode = Bindful;
|
||||
AddressingMode samplerAddressingMode = Bindful;
|
||||
@@ -67,10 +72,13 @@ struct KernelDescriptor final {
|
||||
return Stateless == bufferAddressingMode;
|
||||
}
|
||||
|
||||
bool usesBarriers() const {
|
||||
return 0 != barrierCount;
|
||||
}
|
||||
|
||||
union {
|
||||
struct {
|
||||
bool usesPrintf : 1;
|
||||
bool usesBarriers : 1;
|
||||
bool usesFencesForReadWriteImages : 1;
|
||||
bool usesFlattenedLocalIds;
|
||||
bool usesPrivateMemory : 1;
|
||||
@@ -170,6 +178,7 @@ struct KernelDescriptor final {
|
||||
} external;
|
||||
|
||||
std::vector<uint8_t> generatedHeaps;
|
||||
std::unique_ptr<ExtendedInfoBase> extendedInfo;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h"
|
||||
#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/kernel/read_extended_info.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@@ -49,17 +50,20 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
|
||||
} else {
|
||||
dst.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless;
|
||||
}
|
||||
|
||||
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
|
||||
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
|
||||
dst.kernelAttributes.barrierCount = execEnv.HasBarriers;
|
||||
|
||||
dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue);
|
||||
dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers);
|
||||
dst.kernelAttributes.hasBarriers = execEnv.HasBarriers;
|
||||
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
|
||||
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
|
||||
dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages);
|
||||
dst.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = (0 != execEnv.SubgroupIndependentForwardProgressRequired);
|
||||
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
|
||||
dst.kernelAttributes.flags.useGlobalAtomics = execEnv.HasGlobalAtomics;
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = (execEnv.StatelessWritesCount > 0U);
|
||||
dst.kernelAttributes.flags.useGlobalAtomics = (0 != execEnv.HasGlobalAtomics);
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = (0 != execEnv.StatelessWritesCount);
|
||||
|
||||
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
|
||||
readExtendedInfo(dst.extendedInfo, execEnv);
|
||||
}
|
||||
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {
|
||||
|
||||
12
shared/source/kernel/read_extended_info.cpp
Normal file
12
shared/source/kernel/read_extended_info.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/read_extended_info.h"
|
||||
|
||||
namespace NEO {
|
||||
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv) {}
|
||||
} // namespace NEO
|
||||
15
shared/source/kernel/read_extended_info.h
Normal file
15
shared/source/kernel/read_extended_info.h
Normal file
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel_descriptor.h"
|
||||
#include "patch_shared.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv);
|
||||
} // namespace NEO
|
||||
@@ -2777,8 +2777,8 @@ kernels:
|
||||
ASSERT_EQ(1U, programInfo.kernelInfos.size());
|
||||
|
||||
auto &kernelDescriptor = programInfo.kernelInfos[0]->kernelDescriptor;
|
||||
EXPECT_EQ(7U, kernelDescriptor.kernelAttributes.hasBarriers);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
|
||||
EXPECT_EQ(7U, kernelDescriptor.kernelAttributes.barrierCount);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.usesBarriers());
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption);
|
||||
EXPECT_EQ(13U, kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
EXPECT_EQ(KernelDescriptor::Stateless, kernelDescriptor.kernelAttributes.bufferAddressingMode);
|
||||
|
||||
@@ -93,10 +93,10 @@ TEST(KernelDescriptorFromPatchtokens, GivenExecutionEnvironmentThenSetsProperPar
|
||||
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue);
|
||||
|
||||
EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
|
||||
EXPECT_FALSE(kernelDescriptor.kernelAttributes.usesBarriers());
|
||||
execEnv.HasBarriers = 1U;
|
||||
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.usesBarriers());
|
||||
|
||||
EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption);
|
||||
execEnv.DisableMidThreadPreemption = 1U;
|
||||
|
||||
Reference in New Issue
Block a user