Remove executionEnvironment from KernelInfo

Related-To: NEO-3739
This commit is contained in:
Krystian Chmielewski
2020-11-19 12:30:44 +01:00
committed by Compute-Runtime-Automation
parent 96bc6b2e01
commit 4948c39d39
70 changed files with 310 additions and 562 deletions

View File

@@ -327,7 +327,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU;
uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize;
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.flags.usesBarriers, simd, this->getSlmTotalSize(),
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
coreFamily, numThreadsPerSubSlice, localMemSize,
usesImages, false);
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
@@ -366,7 +366,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
descriptor.kernelAttributes.numGrfRequired,
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
auto usesBarriers = descriptor.kernelAttributes.flags.usesBarriers;
auto barrierCount = descriptor.kernelAttributes.barrierCount;
const uint32_t workDim = 3;
const size_t localWorkSize[] = {groupSize[0], groupSize[1], groupSize[2]};
*totalGroupCount = NEO::KernelHelper::getMaxWorkGroupCount(descriptor.kernelAttributes.simdSize,
@@ -375,7 +375,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize),
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
hwHelper.getBarriersCountFromHasBarriers(usesBarriers),
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
workDim,
localWorkSize);
return ZE_RESULT_SUCCESS;

View File

@@ -686,7 +686,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
kernel->makeResident(getGpgpuCommandStreamReceiver());
requiresCoherency |= kernel->requiresCoherency();
mediaSamplerRequired |= kernel->isVmeKernel();
auto numGrfRequiredByKernel = kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired;
auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired);
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
if (kernel->hasUncacheableStatelessArgs()) {

View File

@@ -208,7 +208,7 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
pIDDestination[blockIndex + i].setKernelStartPointer(static_cast<uint32_t>(blockKernelStartPointer));
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(pIDDestination[blockIndex + i],
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,
pBlockInfo->kernelDescriptor.kernelAttributes.barrierCount,
device->getHardwareInfo());
// Set offset to sampler states, block's DHSOffset is added by scheduler

View File

@@ -15,12 +15,12 @@ namespace NEO {
template <>
void GpgpuWalkerHelper<BDWFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
if (disablePerfMode) {
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
GpgpuWalkerHelper<BDWFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
}
} else {
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
// Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work
typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL;
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
@@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper<BDWFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
typedef typename BDWFamily::MI_MATH MI_MATH;
typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
size_t n = 0;
if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
n += sizeof(PIPE_CONTROL) +
(2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +

View File

@@ -15,12 +15,12 @@ namespace NEO {
template <>
void GpgpuWalkerHelper<SKLFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
if (disablePerfMode) {
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
GpgpuWalkerHelper<SKLFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
}
} else {
if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (kernel.getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
// Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work
typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL;
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
@@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper<SKLFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
typedef typename SKLFamily::MI_MATH MI_MATH;
typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
size_t n = 0;
if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
n += sizeof(PIPE_CONTROL) +
(2 * sizeof(MI_LOAD_REGISTER_REG) +
sizeof(MI_LOAD_REGISTER_IMM) +

View File

@@ -194,7 +194,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
kernel.getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->HasBarriers,
kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.barrierCount,
hardwareInfo);
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
@@ -279,7 +279,6 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
rootDeviceIndex);
uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA);
DEBUG_BREAK_IF(patchInfo.executionEnvironment == nullptr);
auto bindingTablePrefetchSize = std::min(31u, static_cast<uint32_t>(kernel.getNumberOfBindingTableStates(rootDeviceIndex)));
if (resetBindingTablePrefetch(kernel)) {

View File

@@ -210,7 +210,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
const auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex);
const auto &kernelDescriptor = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor;
DispatchFlags dispatchFlags(
{}, //csrDependencies
@@ -219,7 +219,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.flushStamp->getStampReference(), //flushStampReference
commandQueue.getThrottle(), //throttle
preemptionMode, //preemptionMode
kernelInfo.patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
L3CachingSettings::l3CacheOn, //l3CacheSettings
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo

View File

@@ -66,9 +66,7 @@ uint32_t Kernel::dummyPatchLocation = 0xbaddf00d;
Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, bool schedulerKernel)
: slmTotalSize(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->workloadInfo.slmStaticSize),
isParentKernel((kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment != nullptr)
? (kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment->HasDeviceEnqueue != 0)
: false),
isParentKernel(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue),
isSchedulerKernel(schedulerKernel),
executionEnvironment(programArg->getExecutionEnvironment()),
program(programArg),
@@ -354,10 +352,8 @@ cl_int Kernel::initialize() {
}
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
if (kernelInfo.patchInfo.executionEnvironment) {
if (!kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);
}
if (false == kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress) {
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);
}
patchBlocksSimdSize(rootDeviceIndex);
@@ -617,6 +613,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
auto rootDeviceIndex = device.getRootDeviceIndex();
auto &kernelInfo = *kernelInfos[rootDeviceIndex];
const auto &patchInfo = kernelInfo.patchInfo;
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
size_t preferredWorkGroupSizeMultiple = 0;
cl_ulong scratchSize;
cl_ulong privateMemSize;
@@ -629,7 +626,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
case CL_KERNEL_WORK_GROUP_SIZE:
maxWorkgroupSize = kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize;
if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) {
auto divisionSize = CommonConstants::maximalSimdSize / patchInfo.executionEnvironment->LargestCompiledSIMDSize;
auto divisionSize = CommonConstants::maximalSimdSize / kernelInfo.getMaxSimdSize();
maxWorkgroupSize /= divisionSize;
}
srcSize = sizeof(maxWorkgroupSize);
@@ -637,10 +634,9 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
break;
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
requiredWorkGroupSize.val[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
requiredWorkGroupSize.val[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
requiredWorkGroupSize.val[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
requiredWorkGroupSize.val[0] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
requiredWorkGroupSize.val[1] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
requiredWorkGroupSize.val[2] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
srcSize = sizeof(requiredWorkGroupSize);
pSrc = &requiredWorkGroupSize;
break;
@@ -654,8 +650,7 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para
break;
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
preferredWorkGroupSizeMultiple = patchInfo.executionEnvironment->LargestCompiledSIMDSize;
preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize();
if (hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
preferredWorkGroupSizeMultiple *= 2;
}
@@ -695,7 +690,7 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
const auto &kernelInfo = getKernelInfo(rootDeviceIndex);
auto maxSimdSize = static_cast<size_t>(kernelInfo.getMaxSimdSize());
auto maxRequiredWorkGroupSize = static_cast<size_t>(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize(rootDeviceIndex)));
auto largestCompiledSIMDSize = static_cast<size_t>(kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize);
auto largestCompiledSIMDSize = static_cast<size_t>(kernelInfo.getMaxSimdSize());
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
@@ -781,10 +776,10 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para
return changeGetInfoStatusToCLResultType(info.set<size_t>(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize)));
}
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: {
return changeGetInfoStatusToCLResultType(info.set<size_t>(static_cast<size_t>(kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber)));
return changeGetInfoStatusToCLResultType(info.set<size_t>(static_cast<size_t>(kernelInfo.kernelDescriptor.kernelMetadata.compiledSubGroupsNumber)));
}
case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: {
return changeGetInfoStatusToCLResultType(info.set<size_t>(kernelInfo.requiredSubGroupSize));
return changeGetInfoStatusToCLResultType(info.set<size_t>(kernelInfo.kernelDescriptor.kernelMetadata.requiredSubGroupSize));
}
default:
return CL_INVALID_VALUE;
@@ -1101,24 +1096,24 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
return 0;
}
auto executionEnvironment = getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
const auto &kernelDescriptor = getKernelInfo(rootDeviceIndex).kernelDescriptor;
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
if (dssCount == 0) {
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
}
auto availableThreadCount = hwHelper.calculateAvailableThreadCount(
hardwareInfo.platform.eProductFamily,
((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber),
kernelDescriptor.kernelAttributes.numGrfRequired,
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u);
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
return KernelHelper::getMaxWorkGroupCount(kernelInfos[rootDeviceIndex]->getMaxSimdSize(),
availableThreadCount,
dssCount,
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
hwHelper.alignSlmSize(slmTotalSize),
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
hwHelper.getBarriersCountFromHasBarriers(hasBarriers),
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
workDim,
localWorkSize);
}
@@ -2084,8 +2079,8 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
kernelData->m_sizeOfConstantBuffer = kernelInfo.getConstantBufferSize();
kernelData->m_PatchTokensMask = tokenMaskIn;
kernelData->m_ScratchSpacePatchValue = 0;
kernelData->m_SIMDSize = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize : 0;
kernelData->m_HasBarriers = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->HasBarriers : 0;
kernelData->m_SIMDSize = kernelInfo.getMaxSimdSize();
kernelData->m_HasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.barrierCount;
kernelData->m_RequiredWkgSizes[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
kernelData->m_RequiredWkgSizes[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
kernelData->m_RequiredWkgSizes[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];

View File

@@ -136,8 +136,7 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) {
auto rootDeviceIndex = device.getRootDeviceIndex();
const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(rootDeviceIndex);
this->maxWorkGroupSize = dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(rootDeviceIndex);
auto pExecutionEnvironment = kernelInfo.patchInfo.executionEnvironment;
this->hasBarriers = (pExecutionEnvironment != nullptr) && (pExecutionEnvironment->HasBarriers);
this->hasBarriers = kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers();
this->simdSize = static_cast<uint32_t>(kernelInfo.getMaxSimdSize());
this->slmTotalSize = static_cast<uint32_t>(dispatchInfo.getKernel()->slmTotalSize);
this->coreFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
@@ -185,7 +184,6 @@ KernelInfo::~KernelInfo() {
}
void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
this->patchInfo.executionEnvironment = execEnv;
if (execEnv->CompiledForGreaterThan4GBBuffers == false) {
this->requiresSshForBuffers = true;
}

View File

@@ -140,26 +140,13 @@ struct KernelInfo {
size_t getBorderColorStateSize() const;
size_t getBorderColorOffset() const;
unsigned int getMaxSimdSize() const {
const auto executionEnvironment = patchInfo.executionEnvironment;
if (executionEnvironment == nullptr || executionEnvironment->LargestCompiledSIMDSize == 1) {
return 1;
}
if (executionEnvironment->CompiledSIMD32) {
return 32;
}
if (executionEnvironment->CompiledSIMD16) {
return 16;
}
return 8;
return kernelDescriptor.kernelAttributes.simdSize;
}
bool hasDeviceEnqueue() const {
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
return kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue;
}
bool requiresSubgroupIndependentForwardProgress() const {
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
}
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];

View File

@@ -62,7 +62,6 @@ struct PatchInfo {
imageMemObjKernelArgs;
const SPatchDataParameterStream *dataParameterStream = nullptr;
const SPatchThreadPayload *threadPayload = nullptr;
const SPatchExecutionEnvironment *executionEnvironment = nullptr;
const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr;
const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr;
const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr;

View File

@@ -30,6 +30,8 @@ class MediaImageSetArgTest : public ClDeviceFixture,
ClDeviceFixture::SetUp();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);

View File

@@ -5,6 +5,8 @@
*
*/
#include "shared/source/kernel/grf_config.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
@@ -61,6 +63,8 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
size_t globalWorkOffset[] = {0, 0, 0};
size_t localWorkSize[] = {8, 8, 8};
size_t maxConcurrentWorkGroupCount = 0;
const_cast<KernelInfo &>(pKernel->getKernelInfo(pDevice->getRootDeviceIndex())).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize,
&maxConcurrentWorkGroupCount);
EXPECT_EQ(CL_SUCCESS, retVal);

View File

@@ -22,7 +22,7 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
ASSERT_EQ(3u, maxWorkDim);
maxWorkGroupSize = static_cast<size_t>(pKernel->kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize);
ASSERT_GE(1024u, maxWorkGroupSize);
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->LargestCompiledSIMDSize);
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize());
ASSERT_EQ(32u, largestCompiledSIMDSize);
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
@@ -263,7 +263,7 @@ TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileN
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledSubGroupsNumber));
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.compiledSubGroupsNumber));
}
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileSizeTest;

View File

@@ -24,6 +24,7 @@ class KernelArgSvmFixture : public ApiFixture<> {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -20,6 +20,7 @@ class KernelExecInfoFixture : public ApiFixture<> {
REQUIRE_SVM_OR_SKIP(defaultHwInfo);
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex));
ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize());

View File

@@ -503,7 +503,7 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
this->pCmdQ->flush();
expectMemory<FamilyType>(reinterpret_cast<void *>(pBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()),

View File

@@ -763,7 +763,7 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStateles
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}
@@ -797,7 +797,7 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStateles
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}
@@ -1004,8 +1004,7 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderAndInvalidParamsWhenGettingDispatchInfoTh
TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
MockKernelWithInternals mockKernel{*pClDevice};
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
@@ -1054,8 +1053,7 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre
TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) {
MockKernelWithInternals mockKernel{*pClDevice};
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;

View File

@@ -101,7 +101,8 @@ struct CommandQueueStateless : public CommandQueueHw<FamilyType> {
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
auto kernel = dispatchInfo.begin()->getKernel();
auto rootDeviceIndex = this->device->getRootDeviceIndex();
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}
};
@@ -115,12 +116,12 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
auto &device = dispatchInfo.begin()->getClDevice();
auto rootDeviceIndex = device.getRootDeviceIndex();
if (!device.areSharedSystemAllocationsAllowed()) {
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) {
EXPECT_TRUE(kernel->allBufferArgsStateful);
}
} else {
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}
}

View File

@@ -49,11 +49,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
executionEnvironment = {};
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
executionEnvironment.CompiledSIMD32 = 1;
executionEnvironment.LargestCompiledSIMDSize = 32;
memset(&threadPayload, 0, sizeof(threadPayload));
threadPayload.LocalIDXPresent = 1;
threadPayload.LocalIDYPresent = 1;
@@ -68,13 +63,13 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
kernelInfo.patchInfo.threadPayload = &threadPayload;
kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa;
kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa);
kernelInfoWithSampler.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfoWithSampler.patchInfo.executionEnvironment = &executionEnvironment;
kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32;
kernelInfoWithSampler.patchInfo.threadPayload = &threadPayload;
kernelInfoWithSampler.patchInfo.samplerStateArray = &samplerArray;
kernelInfoWithSampler.heapInfo.pDsh = static_cast<const void *>(dsh);
@@ -100,7 +95,6 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
SKernelBinaryHeaderCommon kernelHeader = {};
SPatchDataParameterStream dataParameterStream = {};
SPatchExecutionEnvironment executionEnvironment = {};
SPatchThreadPayload threadPayload = {};
SPatchSamplerStateArray samplerArray = {};

View File

@@ -211,7 +211,7 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}

View File

@@ -230,7 +230,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
}

View File

@@ -363,7 +363,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKerne
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess);
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);

View File

@@ -713,12 +713,14 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr
UltClDeviceFactory clDeviceFactory{1, 0};
MockContext context{clDeviceFactory.rootDevices[0]};
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecutionEnvironment};
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv};
cl_int retVal;
std::unique_ptr<CommandQueue> pCommandQueue{CommandQueue::create(&context, clDeviceFactory.rootDevices[0], nullptr, true, retVal)};

View File

@@ -125,12 +125,10 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
EXPECT_NE(nullptr, kernel);
// This kernel does not operate on OpenCL 2.0 Read and Write images
EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) false);
EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages);
// Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images.
// Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer
struct SPatchExecutionEnvironment *pExecEnv = (struct SPatchExecutionEnvironment *)kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment;
pExecEnv->UsesFencesForReadWriteImages = (uint32_t) true;
EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) true);
const_cast<KernelDescriptor &>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = true;
// Enqueue kernel that may require special WA DisableLSQCROPERFforOCL
auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get());
@@ -150,7 +148,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
pExecEnv->UsesFencesForReadWriteImages = (uint32_t) false;
const_cast<KernelDescriptor &>(kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = false;
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);
EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH);

View File

@@ -694,14 +694,10 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarrier
dispatchInfo.setClDevice(&device);
dispatchInfo.setKernel(kernel.mockKernel);
kernel.kernelInfo.patchInfo.executionEnvironment = nullptr;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 0;
EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
kernel.executionEnvironment.HasBarriers = 0;
kernel.kernelInfo.patchInfo.executionEnvironment = &kernel.executionEnvironment;
EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers);
kernel.executionEnvironment.HasBarriers = 1;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
EXPECT_TRUE(WorkSizeInfo{dispatchInfo}.hasBarriers);
}

View File

@@ -326,8 +326,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsC
// 4 pages padding expected after cleanup section
EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed);
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages) {
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
cleanupSectionOffsetToParse += GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2;
}
@@ -402,7 +401,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmC
auto pipeControlItor = find<PIPE_CONTROL *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages && GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) {
if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages && GpgpuWalkerHelper<FamilyType>::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) {
auto loadRegImmItor = find<MI_LOAD_REGISTER_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor);
@@ -628,7 +627,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
auto iddCount = blockManager->getCount();
for (uint32_t i = 0; i < iddCount; i++) {
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
const_cast<KernelDescriptor &>(blockManager->getBlockKernelInfo(i)->kernelDescriptor).kernelAttributes.barrierCount = 1U;
}
auto surfaceStateHeapSize =

View File

@@ -95,7 +95,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
@@ -103,7 +102,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->getMaxSimdSize(), numChannels);
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
@@ -323,7 +322,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), nullptr);

View File

@@ -39,9 +39,6 @@ void DevicePreemptionTests::SetUp() {
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr, rootDeviceIndex));
context.reset(new MockContext(device.get()));
cmdQ.reset(new MockCommandQueue(context.get(), device.get(), properties));
executionEnvironment.reset(new SPatchExecutionEnvironment);
memset(executionEnvironment.get(), 0, sizeof(SPatchExecutionEnvironment));
kernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
program = std::make_unique<MockProgram>(toClDeviceVector(*device));
kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex)));
dispatchInfo.reset(new DispatchInfo(device.get(), kernel.get(), 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0)));

View File

@@ -16,10 +16,6 @@
#include <memory>
namespace iOpenCL {
struct SPatchExecutionEnvironment;
}
namespace NEO {
class DispatchInfo;
class MockCommandQueue;
@@ -50,7 +46,6 @@ class DevicePreemptionTests : public ::testing::Test {
std::unique_ptr<NEO::MockClDevice> device;
std::unique_ptr<NEO::MockContext> context;
std::unique_ptr<DebugManagerStateRestore> dbgRestore;
std::unique_ptr<iOpenCL::SPatchExecutionEnvironment> executionEnvironment;
std::unique_ptr<NEO::MockProgram> program;
std::unique_ptr<NEO::KernelInfo> kernelInfo;
const uint32_t rootDeviceIndex = 0u;

View File

@@ -22,6 +22,8 @@ KernelImageArgTest::~KernelImageArgTest() = default;
void KernelImageArgTest::SetUp() {
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
KernelArgPatchInfo kernelArgPatchInfo;
pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);

View File

@@ -21,9 +21,10 @@ using Gen11EnqueueTest = Test<ClDeviceFixture>;
GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenDefaultPolicyIsProgrammed) {
MockContext mc;
CommandQueueHw<FamilyType> cmdQ{&mc, pClDevice, 0, false};
SPatchExecutionEnvironment executionEnvironment = {};
executionEnvironment.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
@@ -39,9 +40,10 @@ GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhen
GEN11TEST_F(Gen11EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
MockContext mc;
CommandQueueHw<FamilyType> cmdQ{&mc, pClDevice, 0, false};
SPatchExecutionEnvironment executionEnvironment = {};
executionEnvironment.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);

View File

@@ -21,9 +21,10 @@ using Gen9EnqueueTest = Test<ClDeviceFixture>;
GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) {
MockContext mc;
CommandQueueHw<SKLFamily> cmdQ{&mc, pClDevice, 0, false};
SPatchExecutionEnvironment executionEnvironment = {};
executionEnvironment.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);
@@ -39,9 +40,10 @@ GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKe
GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) {
MockContext mc;
CommandQueueHw<SKLFamily> cmdQ{&mc, pClDevice, 0, false};
SPatchExecutionEnvironment executionEnvironment = {};
executionEnvironment.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment);
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false;
MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr);

View File

@@ -1134,9 +1134,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GTPinTests, givenInitializedGTPinInterfaceWhenKernel
pPatch1->CompiledSubGroupsNumber = 0;
pPatch1->HasBarriers = 0;
pPatch1->DisableMidThreadPreemption = 0;
pPatch1->CompiledSIMD8 = 0;
pPatch1->CompiledSIMD16 = 0;
pPatch1->CompiledSIMD32 = 1;
pPatch1->HasDeviceEnqueue = 1;
pPatch1->MayAccessUndeclaredResource = 0;
pPatch1->UsesFencesForReadWriteImages = 0;

View File

@@ -31,20 +31,16 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
cl_device_id device = pClDevice;
ContextFixture::SetUp(1, &device);
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
pMediaVFEstate = new SPatchMediaVFEState();
pMediaVFEstate->PerThreadScratchSpace = 1024;
pMediaVFEstate->ScratchSpaceOffset = 0;
pExecutionEnvironment = new SPatchExecutionEnvironment();
pExecutionEnvironment->CompiledSIMD32 = 1;
pExecutionEnvironment->LargestCompiledSIMDSize = 32;
pExecutionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate;
pKernelInfo->patchInfo.executionEnvironment = pExecutionEnvironment;
pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface;
KernelArgPatchInfo kernelArg1PatchInfo;
@@ -79,7 +75,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
void TearDown() override {
delete pKernel;
delete pPrintfSurface;
delete pExecutionEnvironment;
delete pMediaVFEstate;
delete pProgram;
@@ -89,7 +84,6 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture
std::unique_ptr<KernelInfo> pKernelInfo;
SPatchMediaVFEState *pMediaVFEstate = nullptr;
SPatchExecutionEnvironment *pExecutionEnvironment;
SPatchAllocateStatelessPrintfSurface *pPrintfSurface = nullptr;
MockProgram *pProgram = nullptr;
MockKernel *pKernel = nullptr;

View File

@@ -26,6 +26,7 @@
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
#include <iostream>
using namespace NEO;
void HardwareCommandsTest::SetUp() {
@@ -560,6 +561,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1;
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0;
KernelInfoContainer kernelInfos;
modifiedKernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
kernelInfos.push_back(&modifiedKernelInfo);
MockKernel mockKernel(kernel->getProgram(), kernelInfos, false);
uint32_t interfaceDescriptorIndex = 0;
@@ -583,18 +585,21 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
true,
*pDevice);
size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize());
size_t expectedIohSize = ((modifiedKernelInfo.getMaxSimdSize() == 32) ? 32 : 16) * 3 * numThreads * sizeof(uint16_t);
constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF);
size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ;
ASSERT_NE(nullptr, modifiedKernelInfo.patchInfo.threadPayload);
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*modifiedKernelInfo.patchInfo.threadPayload);
size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize);
ASSERT_LE(expectedIohSize, ioh.getUsed());
auto expectedLocalIds = alignedMalloc(expectedIohSize, 64);
uint32_t grfSize = sizeof(typename FamilyType::GRF);
generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(),
std::array<uint16_t, 3>{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}},
std::array<uint8_t, 3>{{modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0],
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1],
modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}},
false, grfSize);
EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize));
alignedFree(expectedLocalIds);
}
@@ -684,12 +689,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// define patch offsets for global, constant, private, event pool and default device queue surfaces
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
@@ -854,6 +854,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTableStatesForKernelThenSshIsNotUsed) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// create program with valid context
MockContext context;
@@ -909,6 +910,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl
HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStatesThenPointerIsZero) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// create program with valid context
MockContext context;

View File

@@ -34,16 +34,14 @@ struct PerThreadDataTests : public ClDeviceFixture,
threadPayload.UnusedPerThreadConstantPresent =
!(localIdX || localIdY || localIdZ || flattenedId);
executionEnvironment = {};
executionEnvironment.CompiledSIMD32 = 1;
executionEnvironment.LargestCompiledSIMDSize = 32;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
kernelInfo.patchInfo.threadPayload = &threadPayload;
simd = executionEnvironment.LargestCompiledSIMDSize;
simd = kernelInfo.getMaxSimdSize();
numChannels = threadPayload.LocalIDXPresent +
threadPayload.LocalIDYPresent +
threadPayload.LocalIDZPresent;
@@ -69,7 +67,6 @@ struct PerThreadDataTests : public ClDeviceFixture,
SKernelBinaryHeaderCommon kernelHeader;
SPatchThreadPayload threadPayload;
SPatchExecutionEnvironment executionEnvironment;
KernelInfo kernelInfo;
};

View File

@@ -240,7 +240,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference);
EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle);
EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode);
EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired);
EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired);
EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings);
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush);

View File

@@ -46,6 +46,7 @@ class CloneKernelFixture : public ContextFixture, public ClDeviceFixture {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -45,6 +45,8 @@ class KernelArgAcceleratorFixture : public ContextFixture, public ClDeviceFixtur
ContextFixture::SetUp(1, &device);
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
KernelArgPatchInfo kernelArgPatchInfo;
pKernelInfo->kernelArgInfo.resize(1);

View File

@@ -33,6 +33,7 @@ void KernelArgBufferFixture::SetUp() {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -23,6 +23,8 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture<DeviceQueue> {
pDeviceQueue = createQueueObject();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelArgInfo.resize(1);
pKernelInfo->kernelArgInfo[0].isDeviceQueue = true;

View File

@@ -39,6 +39,7 @@ class KernelArgPipeFixture : public ContextFixture, public ClDeviceFixture {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -38,6 +38,7 @@ class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -28,6 +28,7 @@ class KernelArgImmediateTest : public Test<ClDeviceFixture> {
// define kernel info
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup kernel arg offsets
KernelArgPatchInfo kernelArgPatchInfo;

View File

@@ -654,9 +654,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl
cl_queue_properties properties[1] = {0};
DeviceQueue devQueue(&context, device.get(), properties[0]);
SPatchExecutionEnvironment environment = {};
environment.HasDeviceEnqueue = 1;
info.patchInfo.executionEnvironment = &environment;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
SPatchDataParameterStream dataParameterStream;
dataParameterStream.Size = 0;
@@ -709,9 +707,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe
uint32_t devQueueCurbeOffset = 16;
uint32_t devQueueCurbeSize = 4;
SPatchExecutionEnvironment environment = {};
environment.HasDeviceEnqueue = 1;
info.patchInfo.executionEnvironment = &environment;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
SPatchDataParameterStream dataParameterStream;
dataParameterStream.Size = 0;
@@ -1245,11 +1241,8 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
info.patchInfo.dataParameterStream = &dataParameterStream;
executionEnvironment = {};
executionEnvironment.LargestCompiledSIMDSize = 16;
executionEnvironment.HasBarriers = 1;
info.patchInfo.executionEnvironment = &executionEnvironment;
info.kernelDescriptor.kernelAttributes.simdSize = 16;
info.kernelDescriptor.kernelAttributes.barrierCount = 1;
info.patchInfo.threadPayload = &threadPayload;
@@ -1274,7 +1267,6 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
KernelInfo info;
SPatchSamplerStateArray samplerStateArray;
SPatchDataParameterStream dataParameterStream;
SPatchExecutionEnvironment executionEnvironment;
SPatchThreadPayload threadPayload;
SPatchAllocateStatelessPrivateSurface privateSurface;
@@ -1330,8 +1322,8 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
EXPECT_EQ(dataParameterStream.DataParameterStreamSize, kernelData->m_sizeOfConstantBuffer);
EXPECT_EQ(tokenMask, kernelData->m_PatchTokensMask);
EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue);
EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize);
EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.simdSize, kernelData->m_SIMDSize);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.barrierCount, kernelData->m_HasBarriers);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], kernelData->m_RequiredWkgSizes[0]);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelData->m_RequiredWkgSizes[1]);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2], kernelData->m_RequiredWkgSizes[2]);
@@ -1356,36 +1348,6 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
EXPECT_EQ(expectedOffset, offset);
}
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullExecutionEnvironmentWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
info.patchInfo.executionEnvironment = nullptr;
std::unique_ptr<char> kernelDataMemory(new char[4096]);
std::vector<IGIL_KernelCurbeParams> curbeParams;
uint64_t tokenMask = 1 | 2 | 4;
size_t maxConstantBufferSize = 32;
size_t samplerCount = 1;
size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize();
uint32_t offsetInKernelDataMemory = 0;
uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory,
curbeParams, tokenMask, maxConstantBufferSize, samplerCount,
info, pPlatform->getClDevice(0)->getHardwareInfo());
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(kernelDataMemory.get() + offsetInKernelDataMemory);
EXPECT_EQ(0u, kernelData->m_SIMDSize);
EXPECT_EQ(0u, kernelData->m_HasBarriers);
size_t expectedOffset = offsetInKernelDataMemory;
expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *));
expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams);
EXPECT_EQ(expectedOffset, offset);
}
TEST_F(ReflectionSurfaceHelperSetKernelDataTest, GivenNullThreadPayloadWhenSettingKernelDataThenDataAndOffsetsAreCorrect) {
info.patchInfo.threadPayload = nullptr;
@@ -2128,9 +2090,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl
cl_queue_properties properties[1] = {0};
DeviceQueue devQueue(context.get(), device1, properties[0]);
SPatchExecutionEnvironment environment = {};
environment.HasDeviceEnqueue = 1;
info.patchInfo.executionEnvironment = &environment;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
SPatchDataParameterStream dataParameterStream;
dataParameterStream.Size = 0;
@@ -2185,9 +2145,7 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe
uint32_t devQueueCurbeOffset = 16;
uint32_t devQueueCurbeSize = 4;
SPatchExecutionEnvironment environment = {};
environment.HasDeviceEnqueue = 1;
info.patchInfo.executionEnvironment = &environment;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
SPatchDataParameterStream dataParameterStream;
dataParameterStream.Size = 0;

View File

@@ -24,6 +24,8 @@ class KernelSlmArgTest : public Test<ClDeviceFixture> {
void SetUp() override {
ClDeviceFixture::SetUp();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
KernelArgPatchInfo kernelArgPatchInfo;
pKernelInfo->kernelArgInfo.resize(3);

View File

@@ -25,11 +25,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
executionEnvironment = {};
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
executionEnvironment.CompiledSIMD32 = 1;
executionEnvironment.LargestCompiledSIMDSize = 32;
memset(&threadPayload, 0, sizeof(threadPayload));
threadPayload.LocalIDXPresent = 1;
threadPayload.LocalIDYPresent = 1;
@@ -38,7 +33,9 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
kernelInfo.patchInfo.threadPayload = &threadPayload;
}
void TearDown() override {
@@ -52,7 +49,6 @@ struct KernelSLMAndBarrierTest : public ClDeviceFixture,
SKernelBinaryHeaderCommon kernelHeader;
SPatchDataParameterStream dataParameterStream;
SPatchExecutionEnvironment executionEnvironment;
SPatchThreadPayload threadPayload;
KernelInfo kernelInfo;
@@ -69,7 +65,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
// define kernel info
executionEnvironment.HasBarriers = 1;
kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB;
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex));
@@ -135,7 +131,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr
}
ASSERT_GT(ExpectedSLMSize, 0u);
EXPECT_EQ(ExpectedSLMSize, pSrcIDData->getSharedLocalMemorySize());
EXPECT_EQ(!!executionEnvironment.HasBarriers, pSrcIDData->getBarrierEnable());
EXPECT_EQ(kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), pSrcIDData->getBarrierEnable());
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, pSrcIDData->getDenormMode());
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {

View File

@@ -430,7 +430,6 @@ TEST(PatchInfo, WhenPatchInfoIsCreatedThenMembersAreNullptr) {
EXPECT_EQ(nullptr, patchInfo.bindingTableState);
EXPECT_EQ(nullptr, patchInfo.dataParameterStream);
EXPECT_EQ(nullptr, patchInfo.threadPayload);
EXPECT_EQ(nullptr, patchInfo.executionEnvironment);
EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo);
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface);
EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization);
@@ -549,11 +548,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda
tokenDPS.DataParameterStreamSize = 64;
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// create kernel
MockContext context;
@@ -590,9 +585,7 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh
tokenDPS.DataParameterStreamSize = 64;
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
MockContext context;
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
@@ -631,11 +624,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou
tokenDPS.DataParameterStreamSize = 64;
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// create kernel
MockContext context;
@@ -676,11 +665,7 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS
tokenDPS.DataParameterStreamSize = 64;
pKernelInfo->patchInfo.dataParameterStream = &tokenDPS;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// create kernel
MockContext context;
@@ -700,11 +685,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup constant memory
SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface;
@@ -752,11 +733,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup global memory
char buffer[16];
@@ -801,12 +778,11 @@ TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamWhenGettingConst
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) {
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
*executionEnvironment = {};
executionEnvironment->CompiledSIMD32 = 32;
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
MockContext context;
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
@@ -820,12 +796,11 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) {
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
*executionEnvironment = {};
executionEnvironment->CompiledSIMD32 = 32;
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
MockContext context;
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
@@ -839,12 +814,11 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe
TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) {
auto pAllocateStatelessPrivateSurface = std::unique_ptr<SPatchAllocateStatelessPrivateSurface>(new SPatchAllocateStatelessPrivateSurface());
pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits<uint32_t>::max();
auto executionEnvironment = std::unique_ptr<SPatchExecutionEnvironment>(new SPatchExecutionEnvironment());
*executionEnvironment = {};
executionEnvironment->CompiledSIMD32 = 32;
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get();
pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
MockContext context;
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
std::unique_ptr<MockKernel> pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)));
@@ -871,11 +845,7 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
char buffer[16];
@@ -914,11 +884,7 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
char buffer[16];
@@ -944,11 +910,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup global memory
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization;
@@ -998,12 +960,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup global memory
char buffer[16];
@@ -1044,11 +1001,7 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
char buffer[16];
@@ -1086,11 +1039,7 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst
tempSPatchDataParameterStream.DataParameterStreamSize = 16;
pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream;
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
char buffer[16];
@@ -1115,12 +1064,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup constant memory
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization;
@@ -1170,12 +1114,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup global memory
char buffer[16];
@@ -1204,12 +1143,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup event pool surface
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
@@ -1253,12 +1187,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup event pool surface
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
@@ -1302,13 +1231,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr;
// create kernel
@@ -1334,12 +1257,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup event pool surface
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
@@ -1370,12 +1288,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup event pool surface
SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface;
@@ -1408,12 +1321,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup default device queue surface
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
@@ -1457,12 +1365,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup default device queue surface
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
@@ -1508,12 +1411,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup default device queue surface
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
@@ -1542,13 +1440,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr;
// create kernel
@@ -1574,12 +1466,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
SPatchExecutionEnvironment tokenEE = {};
tokenEE.CompiledSIMD8 = false;
tokenEE.CompiledSIMD16 = false;
tokenEE.CompiledSIMD32 = true;
pKernelInfo->patchInfo.executionEnvironment = &tokenEE;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
// setup default device queue surface
SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface;
@@ -1616,6 +1503,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1651,6 +1540,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFunctionsIsaAllocationIsMadeResident) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -1689,6 +1580,8 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun
HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBufferIsMadeResident) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;
@@ -2030,6 +1923,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemor
TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelArgInfo.resize(3);
pKernelInfo->kernelArgInfo[2].isImage = true;
pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true;
@@ -2047,6 +1941,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasIma
TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelArgInfo.resize(3);
pKernelInfo->kernelArgInfo[2].isImage = true;
pKernelInfo->kernelArgInfo[1].isBuffer = true;
@@ -2064,6 +1959,7 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfIt
TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
pKernelInfo->kernelArgInfo.resize(1);
pKernelInfo->kernelArgInfo[0].isBuffer = true;
@@ -2118,6 +2014,8 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor
EXPECT_EQ(imageY->getMediaPlaneType(), 0u);
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
KernelArgInfo kernelArgInfo;
kernelArgInfo.isImage = true;
@@ -2144,8 +2042,7 @@ struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
pKernelInfo = std::make_unique<KernelInfo>();
executionEnvironment.CompiledSIMD32 = 1;
pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
@@ -2163,65 +2060,6 @@ struct KernelExecutionEnvironmentTest : public Test<ClDeviceFixture> {
SPatchExecutionEnvironment executionEnvironment = {};
};
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledSimd32TrueWhenGettingMaxSimdSizeThen32IsReturned) {
executionEnvironment.CompiledSIMD32 = true;
executionEnvironment.CompiledSIMD16 = true;
executionEnvironment.CompiledSIMD8 = true;
EXPECT_EQ(32u, this->pKernelInfo->getMaxSimdSize());
}
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledSimd32FalseAndCompiledSimd16TrueWhenGettingMaxSimdSizeThen16IsReturned) {
executionEnvironment.CompiledSIMD32 = false;
executionEnvironment.CompiledSIMD16 = true;
executionEnvironment.CompiledSIMD8 = true;
EXPECT_EQ(16u, this->pKernelInfo->getMaxSimdSize());
}
TEST_F(KernelExecutionEnvironmentTest, GivenOnlyCompiledSimd8TrueWhenGettingMaxSimdSizeThen8IsReturned) {
executionEnvironment.CompiledSIMD32 = false;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD8 = true;
EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize());
}
TEST_F(KernelExecutionEnvironmentTest, GivenAllCompiledSimdFalseWhenGettingMaxSimdSizeThen8IsReturned) {
executionEnvironment.CompiledSIMD32 = false;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD8 = false;
EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize());
}
TEST_F(KernelExecutionEnvironmentTest, GivenExecutionEnvironmentNotAvailableWhenGettingMaxSimdSizeThen1IsReturned) {
executionEnvironment.CompiledSIMD32 = false;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD8 = false;
auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment;
this->pKernelInfo->patchInfo.executionEnvironment = nullptr;
EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize());
this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv;
}
TEST_F(KernelExecutionEnvironmentTest, GivenLargestCompiledSimdSizeEqualOneWhenGettingMaxSimdSizeThen1IsReturned) {
executionEnvironment.LargestCompiledSIMDSize = 1;
auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment;
EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize());
this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv;
}
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsZeroWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
@@ -2285,8 +2123,7 @@ struct KernelCrossThreadTests : Test<ClDeviceFixture> {
pKernelInfo = std::make_unique<KernelInfo>();
ASSERT_NE(nullptr, pKernelInfo);
pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream;
executionEnvironment.CompiledSIMD32 = 1;
pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
}
void TearDown() override {
@@ -2394,7 +2231,6 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) {
pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12;
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
@@ -2408,12 +2244,9 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi
}
TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) {
pKernelInfo->workloadInfo.simdSizeOffset = 16;
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16;
MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
executionEnvironment.CompiledSIMD32 = false;
executionEnvironment.CompiledSIMD16 = true;
executionEnvironment.CompiledSIMD8 = true;
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_NE(nullptr, kernel.kernelDeviceInfos[rootDeviceIndex].dataParameterSimdSize);
@@ -2504,10 +2337,7 @@ TEST_F(KernelCrossThreadTests, WhenPatchingBlocksSimdSizeThenSimdSizeIsPatchedCo
// add a new block kernel to program
auto infoBlock = new KernelInfo();
kernel->executionEnvironmentBlock.CompiledSIMD8 = 0;
kernel->executionEnvironmentBlock.CompiledSIMD16 = 1;
kernel->executionEnvironmentBlock.CompiledSIMD32 = 0;
infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock;
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 16;
kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock);
// patch block's simd size
@@ -2649,15 +2479,18 @@ TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMa
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
MockKernelWithInternals kernel(*device);
kernel.executionEnvironment.LargestCompiledSIMDSize = CommonConstants::maximalSimdSize;
size_t maxKernelWkgSize;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(1024u, maxKernelWkgSize);
kernel.executionEnvironment.LargestCompiledSIMDSize = 16;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16;
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(512u, maxKernelWkgSize);
kernel.executionEnvironment.LargestCompiledSIMDSize = 8;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8;
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(256u, maxKernelWkgSize);
}
@@ -2823,9 +2656,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializin
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
auto minSimd = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).getMinimalSIMDSize();
MockKernelWithInternals kernel(*device);
kernel.executionEnvironment.CompiledSIMD32 = 0;
kernel.executionEnvironment.CompiledSIMD16 = 0;
kernel.executionEnvironment.CompiledSIMD8 = 1;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8;
cl_int retVal = kernel.mockKernel->initialize();
@@ -2839,10 +2670,7 @@ TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializin
TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockKernelWithInternals kernel(*device);
kernel.executionEnvironment.CompiledSIMD32 = 0;
kernel.executionEnvironment.CompiledSIMD16 = 0;
kernel.executionEnvironment.CompiledSIMD8 = 0;
kernel.executionEnvironment.LargestCompiledSIMDSize = 1;
kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1;
cl_int retVal = kernel.mockKernel->initialize();
@@ -3133,10 +2961,11 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKe
}
TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) {
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
UltClDeviceFactory deviceFactory{1, 0};
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment};
SPatchExecutionEnvironment sPatchExecEnv = {};
sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true;
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecEnv};
auto &mockKernel = *mockKernelWithInternals.mockKernel;
auto &hwHelper = HwHelper::get(deviceFactory.rootDevices[0]->getHardwareInfo().platform.eRenderCoreFamily);
@@ -3144,10 +2973,9 @@ TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaul
}
TEST(KernelTest, givenKernelWhenSettingAdditinalKernelExecInfoThenCorrectValueIsSet) {
SPatchExecutionEnvironment sPatchExecutionEnvironment = {};
sPatchExecutionEnvironment.SubgroupIndependentForwardProgressRequired = true;
UltClDeviceFactory deviceFactory{1, 0};
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecutionEnvironment};
MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0]};
mockKernelWithInternals.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = true;
auto &mockKernel = *mockKernelWithInternals.mockKernel;
@@ -3219,6 +3047,7 @@ using KernelMultiRootDeviceTest = MultiRootDeviceFixture;
TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThenPrivateSurfacesHaveCorrectRootDeviceIndex) {
auto pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// setup private memory
SPatchAllocateStatelessPrivateSurface tokenSPS;

View File

@@ -23,6 +23,8 @@ class KernelTransformableTest : public ::testing::Test {
void SetUp() override {
context = std::make_unique<MockContext>(deviceFactory.rootDevices[rootDeviceIndex]);
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
KernelArgPatchInfo kernelArgPatchInfo;
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;

View File

@@ -34,13 +34,10 @@ class MockKernelWithArgumentAccess : public Kernel {
};
TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) {
KernelInfo info;
MockClDevice *device = new MockClDevice{new MockDevice};
MockProgram program(toClDeviceVector(*device));
SPatchExecutionEnvironment environment = {};
environment.HasDeviceEnqueue = 1;
info.patchInfo.executionEnvironment = &environment;
KernelInfo info;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
MockKernelWithArgumentAccess kernel(&program, MockKernel::toKernelInfoContainer(info, device->getRootDeviceIndex()));
@@ -155,10 +152,7 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment;
*executionEnvironmentBlock = {};
executionEnvironmentBlock->HasDeviceEnqueue = 1;
infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock;
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
streamBlock->DataParameterStreamSize = 0;

View File

@@ -32,10 +32,7 @@ class BufferSetArgTest : public ContextFixture,
using ContextFixture::SetUp;
public:
BufferSetArgTest()
{
}
BufferSetArgTest() {}
protected:
void SetUp() override {
@@ -43,7 +40,7 @@ class BufferSetArgTest : public ContextFixture,
cl_device_id device = pClDevice;
ContextFixture::SetUp(1, &device);
pKernelInfo = std::make_unique<KernelInfo>();
ASSERT_NE(nullptr, pKernelInfo);
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// define kernel info
// setup kernel arg offsets

View File

@@ -64,6 +64,7 @@ class ImageSetArgTest : public ClDeviceFixture,
void SetUp() override {
ClDeviceFixture::SetUp();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// define kernel info
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
@@ -918,6 +919,7 @@ class ImageMediaBlockSetArgTest : public ImageSetArgTest {
void SetUp() override {
ClDeviceFixture::SetUp();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// define kernel info
pKernelInfo->heapInfo.pSsh = surfaceStateHeap;

View File

@@ -22,7 +22,7 @@
#include <cassert>
namespace NEO {
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv);
////////////////////////////////////////////////////////////////////////////////
// Kernel - Core implementation
////////////////////////////////////////////////////////////////////////////////
@@ -107,7 +107,6 @@ class MockKernel : public Kernel {
}
if (kernelInfoAllocated) {
delete kernelInfoAllocated->patchInfo.executionEnvironment;
delete kernelInfoAllocated->patchInfo.threadPayload;
delete kernelInfoAllocated;
}
@@ -132,12 +131,9 @@ class MockKernel : public Kernel {
info->patchInfo.threadPayload = threadPayload;
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
memset(executionEnvironment, 0, sizeof(SPatchExecutionEnvironment));
executionEnvironment->HasDeviceEnqueue = 0;
executionEnvironment->NumGRFRequired = grfNumber;
executionEnvironment->CompiledSIMD32 = 1;
info->patchInfo.executionEnvironment = executionEnvironment;
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
info->kernelDescriptor.kernelAttributes.numGrfRequired = grfNumber;
info->kernelDescriptor.kernelAttributes.simdSize = 32;
info->crossThreadData = new char[crossThreadSize];
@@ -258,17 +254,12 @@ class MockKernel : public Kernel {
//class below have enough internals to service Enqueue operation.
class MockKernelWithInternals {
public:
MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment newExecutionEnvironment = {}) {
MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {
memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon));
memset(&threadPayload, 0, sizeof(SPatchThreadPayload));
memcpy(&executionEnvironment, &newExecutionEnvironment, sizeof(SPatchExecutionEnvironment));
memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment));
memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream));
memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState));
memset(&mediaVfeStateSlot1, 0, sizeof(SPatchMediaVFEState));
executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber;
executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber;
executionEnvironment.CompiledSIMD32 = 1;
threadPayload.LocalIDXPresent = 1;
threadPayload.LocalIDYPresent = 1;
threadPayload.LocalIDZPresent = 1;
@@ -277,7 +268,11 @@ class MockKernelWithInternals {
kernelInfo.heapInfo.pDsh = dshLocal;
kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(sshLocal);
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
populateKernelDescriptor(kernelInfo.kernelDescriptor, execEnv);
kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
kernelInfo.patchInfo.threadPayload = &threadPayload;
kernelInfo.patchInfo.mediavfestate = &mediaVfeState;
kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVfeStateSlot1;
@@ -328,8 +323,7 @@ class MockKernelWithInternals {
kernelInfo.kernelArgInfo[0].offsetHeap = 64;
}
}
MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment newExecutionEnvironment) : MockKernelWithInternals(deviceArg, nullptr, false, newExecutionEnvironment) {
MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment execEnv) : MockKernelWithInternals(deviceArg, nullptr, false, execEnv) {
mockKernel->initialize();
}
@@ -353,8 +347,6 @@ class MockKernelWithInternals {
SPatchMediaVFEState mediaVfeState = {};
SPatchMediaVFEState mediaVfeStateSlot1 = {};
SPatchDataParameterStream dataParameterStream = {};
SPatchExecutionEnvironment executionEnvironment = {};
SPatchExecutionEnvironment executionEnvironmentBlock = {};
uint32_t kernelIsa[32];
char crossThreadData[256];
char sshLocal[128];
@@ -390,12 +382,9 @@ class MockParentKernel : public Kernel {
info->patchInfo.threadPayload = threadPayload;
SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
*executionEnvironment = {};
executionEnvironment->HasDeviceEnqueue = 1;
executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber;
executionEnvironment->CompiledSIMD32 = 1;
info->patchInfo.executionEnvironment = executionEnvironment;
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
info->kernelDescriptor.kernelAttributes.simdSize = 32;
SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface;
allocateDeviceQueue->DataParamOffset = crossThreadOffset;
@@ -509,11 +498,9 @@ class MockParentKernel : public Kernel {
infoBlock->patchInfo.threadPayload = threadPayloadBlock;
SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment;
executionEnvironmentBlock->HasDeviceEnqueue = 1;
executionEnvironmentBlock->NumGRFRequired = GrfConfig::DefaultGrfNumber;
executionEnvironmentBlock->CompiledSIMD32 = 1;
infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock;
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32;
SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream;
streamBlock->DataParameterStreamSize = 0;
@@ -553,7 +540,6 @@ class MockParentKernel : public Kernel {
continue;
}
auto &kernelInfo = *pKernelInfo;
delete kernelInfo.patchInfo.executionEnvironment;
delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface;
delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface;
delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface;
@@ -567,7 +553,6 @@ class MockParentKernel : public Kernel {
delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface;
delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface;
delete blockInfo->patchInfo.threadPayload;
delete blockInfo->patchInfo.executionEnvironment;
delete blockInfo->patchInfo.dataParameterStream;
delete blockInfo->patchInfo.bindingTableState;
delete blockInfo->patchInfo.interfaceDescriptorData;

View File

@@ -6,13 +6,13 @@
*/
#include "shared/source/command_stream/preemption.h"
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
#include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h"
#include "gmock/gmock.h"
using namespace NEO;
class ThreadGroupPreemptionTests : public DevicePreemptionTests {
void SetUp() override {
dbgRestore.reset(new DebugManagerStateRestore());
@@ -50,7 +50,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowByDevice) {
TEST_F(ThreadGroupPreemptionTests, disallowByReadWriteFencesWA) {
PreemptionFlags flags = {};
executionEnvironment->UsesFencesForReadWriteImages = 1u;
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true;
waTable->waDisableLSQCROPERFforOCL = 1;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags));
@@ -87,18 +87,9 @@ TEST_F(ThreadGroupPreemptionTests, allowDefaultModeForNonKernelRequest) {
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
}
TEST_F(ThreadGroupPreemptionTests, givenKernelWithNoEnvironmentPatchSetWhenLSQCWaIsTurnedOnThenThreadGroupPreemptionIsBeingSelected) {
PreemptionFlags flags = {};
kernelInfo.get()->patchInfo.executionEnvironment = nullptr;
waTable->waDisableLSQCROPERFforOCL = 1;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
}
TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOnThenThreadGroupPreemptionIsBeingSelected) {
PreemptionFlags flags = {};
executionEnvironment.get()->UsesFencesForReadWriteImages = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = false;
waTable->waDisableLSQCROPERFforOCL = 1;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
@@ -107,7 +98,7 @@ TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaI
TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOffThenThreadGroupPreemptionIsBeingSelected) {
PreemptionFlags flags = {};
executionEnvironment.get()->UsesFencesForReadWriteImages = 1;
kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true;
waTable->waDisableLSQCROPERFforOCL = 0;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags));
@@ -167,7 +158,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowDefaultDeviceModeWhenAtLeastOneInvali
TEST_F(MidThreadPreemptionTests, allowMidThreadPreemption) {
PreemptionFlags flags = {};
device->setPreemptionMode(PreemptionMode::MidThread);
executionEnvironment->DisableMidThreadPreemption = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags));
}
@@ -192,7 +183,7 @@ TEST_F(MidThreadPreemptionTests, allowMidThreadPreemptionDeviceSupportPreemption
TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByDevice) {
PreemptionFlags flags = {};
device->setPreemptionMode(PreemptionMode::ThreadGroup);
executionEnvironment->DisableMidThreadPreemption = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags));
EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags));
@@ -201,7 +192,7 @@ TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByDevice) {
TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByKernel) {
PreemptionFlags flags = {};
device->setPreemptionMode(PreemptionMode::MidThread);
executionEnvironment->DisableMidThreadPreemption = 1;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true;
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags));
}
@@ -218,7 +209,7 @@ TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByVmeKernel) {
TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByDevice) {
PreemptionFlags flags = {};
executionEnvironment->DisableMidThreadPreemption = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
device->setPreemptionMode(PreemptionMode::ThreadGroup);
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
@@ -227,7 +218,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByDevice) {
TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByKernel) {
PreemptionFlags flags = {};
executionEnvironment->DisableMidThreadPreemption = 1;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true;
device->setPreemptionMode(PreemptionMode::MidThread);
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
@@ -248,7 +239,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByVmeKernel) {
TEST_F(MidThreadPreemptionTests, taskPreemptionAllow) {
PreemptionFlags flags = {};
executionEnvironment->DisableMidThreadPreemption = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
device->setPreemptionMode(PreemptionMode::MidThread);
PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get());
PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags);
@@ -257,7 +248,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionAllow) {
TEST_F(MidThreadPreemptionTests, taskPreemptionAllowDeviceSupportsPreemptionOnVmeKernel) {
PreemptionFlags flags = {};
executionEnvironment->DisableMidThreadPreemption = 0;
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false;
kernelInfo->isVmeWorkload = true;
kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex)));
device->sharedDeviceInfo.vmeAvcSupportsPreemption = true;

View File

@@ -39,10 +39,7 @@ struct ProfilingTests : public CommandEnqueueFixture,
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
executionEnvironment = {};
memset(&executionEnvironment, 0, sizeof(executionEnvironment));
executionEnvironment.CompiledSIMD32 = 1;
executionEnvironment.LargestCompiledSIMDSize = 32;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
memset(&threadPayload, 0, sizeof(threadPayload));
threadPayload.LocalIDXPresent = 1;
@@ -52,7 +49,6 @@ struct ProfilingTests : public CommandEnqueueFixture,
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
kernelInfo.patchInfo.dataParameterStream = &dataParameterStream;
kernelInfo.patchInfo.executionEnvironment = &executionEnvironment;
kernelInfo.patchInfo.threadPayload = &threadPayload;
}
@@ -64,7 +60,6 @@ struct ProfilingTests : public CommandEnqueueFixture,
SKernelBinaryHeaderCommon kernelHeader = {};
SPatchDataParameterStream dataParameterStream = {};
SPatchExecutionEnvironment executionEnvironment = {};
SPatchThreadPayload threadPayload = {};
KernelInfo kernelInfo;
MockContext ctx;

View File

@@ -303,9 +303,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
executionEnvironment.HasBarriers = false;
executionEnvironment.DisableMidThreadPreemption = true;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD32 = true;
executionEnvironment.CompiledSIMD8 = false;
executionEnvironment.HasDeviceEnqueue = false;
executionEnvironment.MayAccessUndeclaredResource = false;
executionEnvironment.UsesFencesForReadWriteImages = false;
@@ -322,7 +319,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
@@ -340,9 +336,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
executionEnvironment.HasBarriers = false;
executionEnvironment.DisableMidThreadPreemption = true;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD32 = true;
executionEnvironment.CompiledSIMD8 = false;
executionEnvironment.HasDeviceEnqueue = false;
executionEnvironment.MayAccessUndeclaredResource = false;
executionEnvironment.UsesFencesForReadWriteImages = false;
@@ -359,7 +352,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
@@ -378,9 +370,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentCompiledForGreaterThan4gbBuffers
executionEnvironment.CompiledSubGroupsNumber = 0xaa;
executionEnvironment.HasBarriers = false;
executionEnvironment.DisableMidThreadPreemption = true;
executionEnvironment.CompiledSIMD16 = false;
executionEnvironment.CompiledSIMD32 = true;
executionEnvironment.CompiledSIMD8 = false;
executionEnvironment.HasDeviceEnqueue = false;
executionEnvironment.MayAccessUndeclaredResource = false;
executionEnvironment.UsesFencesForReadWriteImages = false;
@@ -396,7 +385,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentCompiledForGreaterThan4gbBuffers
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_FALSE(pKernelInfo->requiresSshForBuffers);
}
@@ -411,7 +399,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentDoesntHaveDeviceEnqueueWhenBuild
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(0u, program->getParentKernelInfoArray(rootDeviceIndex).size());
}
@@ -426,7 +413,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentHasDeviceEnqueueWhenBuildingThen
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(1u, program->getParentKernelInfoArray(rootDeviceIndex).size());
}
@@ -441,7 +427,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentDoesntRequireSubgroupIndependent
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(0u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size());
}
@@ -456,7 +441,6 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(1u, program->getSubgroupKernelInfoArray(rootDeviceIndex).size());
}

View File

@@ -36,6 +36,7 @@ class SamplerSetArgFixture : public ClDeviceFixture {
void SetUp() {
ClDeviceFixture::SetUp();
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
// define kernel info
pKernelInfo->heapInfo.pDsh = samplerStateHeap;

View File

@@ -35,12 +35,10 @@ class MockSchedulerKernel : public SchedulerKernel {
dataParametrStream.DataParameterStreamSize = 8;
dataParametrStream.Size = 8;
SPatchExecutionEnvironment executionEnvironment = {};
executionEnvironment.CompiledSIMD32 = 1;
executionEnvironment.HasDeviceEnqueue = 0;
info->kernelDescriptor.kernelAttributes.simdSize = 32;
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = false;
info->patchInfo.dataParameterStream = &dataParametrStream;
info->patchInfo.executionEnvironment = &executionEnvironment;
KernelArgInfo bufferArg;
bufferArg.isBuffer = true;

View File

@@ -346,6 +346,7 @@ TEST(FileLogger, GivenNullMdiWhenDumpingKernelsThenFileIsNotCreated) {
TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKernelArgsForMdi) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -381,6 +382,7 @@ TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKern
TEST(FileLogger, GivenMdiWhenDumpingKernelArgsThenFileIsCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -426,6 +428,7 @@ TEST(FileLogger, GivenNullWhenDumpingKernelArgsThenFileIsNotCreated) {
TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -442,6 +445,8 @@ TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) {
TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -474,8 +479,8 @@ TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) {
}
TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -504,8 +509,8 @@ TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated
}
TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockProgram program(toClDeviceVector(*device));
auto kernel = std::unique_ptr<MockKernel>(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -528,6 +533,7 @@ TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) {
TEST(FileLogger, GivenBufferNotSetWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
@@ -564,6 +570,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) {
cl_mem clObj = buffer;
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex)));
@@ -601,6 +608,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) {
TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));
@@ -627,8 +635,8 @@ TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) {
}
TEST(FileLogger, GivenImageNotSetWhenDumpingKernelArgsThenFileIsNotCreated) {
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto context = clUniquePtr(new MockContext(device.get()));
auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device)));

View File

@@ -69,7 +69,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.hasBarriers,
kernelDescriptor.kernelAttributes.barrierCount,
hwInfo);
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
HwHelperHw<Family>::get().computeSlmValues(hwInfo, dispatchInterface->getSlmTotalSize()));

View File

@@ -60,14 +60,10 @@ PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreempt
void PreemptionHelper::setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel) {
if (kernel) {
const auto &kernelInfo = kernel->getKernelInfo(device.getRootDeviceIndex());
flags.flags.disabledMidThreadPreemptionKernel =
kernelInfo.patchInfo.executionEnvironment &&
kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption;
const auto &kernelDescriptor = kernel->getKernelInfo(device.getRootDeviceIndex()).kernelDescriptor;
flags.flags.disabledMidThreadPreemptionKernel = kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption;
flags.flags.vmeKernel = kernel->isVmeKernel();
flags.flags.usesFencesForReadWriteImages =
kernelInfo.patchInfo.executionEnvironment &&
kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages;
flags.flags.usesFencesForReadWriteImages = kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages;
flags.flags.schedulerKernel = kernel->isSchedulerKernel;
}
flags.flags.deviceSupportsVmePreemption = device.getDeviceInfo().vmeAvcSupportsPreemption;

View File

@@ -798,8 +798,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
}
}
kernelDescriptor.kernelAttributes.hasBarriers = execEnv.barrierCount;
kernelDescriptor.kernelAttributes.flags.usesBarriers = (kernelDescriptor.kernelAttributes.hasBarriers > 0U);
kernelDescriptor.kernelAttributes.barrierCount = execEnv.barrierCount;
kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = execEnv.disableMidThreadPreemption;
kernelDescriptor.kernelAttributes.numGrfRequired = execEnv.grfCount;
if (execEnv.has4GBBuffers) {

View File

@@ -17,6 +17,8 @@ set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/read_extended_info.cpp
)
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})

View File

@@ -26,6 +26,10 @@ namespace NEO {
using StringMap = std::unordered_map<uint32_t, std::string>;
using InstructionsSegmentOffset = uint16_t;
struct ExtendedInfoBase {
virtual ~ExtendedInfoBase() = default;
};
struct KernelDescriptor final {
enum AddressingMode : uint8_t {
AddrNone,
@@ -46,12 +50,13 @@ struct KernelDescriptor final {
uint32_t perThreadScratchSize[2] = {0U, 0U};
uint32_t perHwThreadPrivateMemorySize = 0U;
uint32_t perThreadSystemThreadSurfaceSize = 0U;
uint32_t hasBarriers = 0u;
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
uint16_t crossThreadDataSize = 0U;
uint16_t perThreadDataSize = 0U;
uint16_t numArgsToPatch = 0U;
uint16_t numGrfRequired = 0U;
uint8_t barrierCount = 0u;
AddressingMode bufferAddressingMode = BindfulAndStateless;
AddressingMode imageAddressingMode = Bindful;
AddressingMode samplerAddressingMode = Bindful;
@@ -67,10 +72,13 @@ struct KernelDescriptor final {
return Stateless == bufferAddressingMode;
}
bool usesBarriers() const {
return 0 != barrierCount;
}
union {
struct {
bool usesPrintf : 1;
bool usesBarriers : 1;
bool usesFencesForReadWriteImages : 1;
bool usesFlattenedLocalIds;
bool usesPrivateMemory : 1;
@@ -170,6 +178,7 @@ struct KernelDescriptor final {
} external;
std::vector<uint8_t> generatedHeaps;
std::unique_ptr<ExtendedInfoBase> extendedInfo;
};
} // namespace NEO

View File

@@ -11,6 +11,7 @@
#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h"
#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/kernel/read_extended_info.h"
#include <sstream>
#include <string>
@@ -49,17 +50,20 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
} else {
dst.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless;
}
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
dst.kernelAttributes.barrierCount = execEnv.HasBarriers;
dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue);
dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers);
dst.kernelAttributes.hasBarriers = execEnv.HasBarriers;
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages);
dst.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = (0 != execEnv.SubgroupIndependentForwardProgressRequired);
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
dst.kernelAttributes.flags.useGlobalAtomics = execEnv.HasGlobalAtomics;
dst.kernelAttributes.flags.usesStatelessWrites = (execEnv.StatelessWritesCount > 0U);
dst.kernelAttributes.flags.useGlobalAtomics = (0 != execEnv.HasGlobalAtomics);
dst.kernelAttributes.flags.usesStatelessWrites = (0 != execEnv.StatelessWritesCount);
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
readExtendedInfo(dst.extendedInfo, execEnv);
}
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {

View File

@@ -0,0 +1,12 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/read_extended_info.h"
namespace NEO {
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv) {}
} // namespace NEO

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "kernel_descriptor.h"
#include "patch_shared.h"
#include <memory>
namespace NEO {
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv);
} // namespace NEO

View File

@@ -2777,8 +2777,8 @@ kernels:
ASSERT_EQ(1U, programInfo.kernelInfos.size());
auto &kernelDescriptor = programInfo.kernelInfos[0]->kernelDescriptor;
EXPECT_EQ(7U, kernelDescriptor.kernelAttributes.hasBarriers);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
EXPECT_EQ(7U, kernelDescriptor.kernelAttributes.barrierCount);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.usesBarriers());
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption);
EXPECT_EQ(13U, kernelDescriptor.kernelAttributes.numGrfRequired);
EXPECT_EQ(KernelDescriptor::Stateless, kernelDescriptor.kernelAttributes.bufferAddressingMode);

View File

@@ -93,10 +93,10 @@ TEST(KernelDescriptorFromPatchtokens, GivenExecutionEnvironmentThenSetsProperPar
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue);
EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
EXPECT_FALSE(kernelDescriptor.kernelAttributes.usesBarriers());
execEnv.HasBarriers = 1U;
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesBarriers);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.usesBarriers());
EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption);
execEnv.DisableMidThreadPreemption = 1U;