KernelInfo refactor

Using kernelDescriptor.kernelAttributes.requiredWorkgroupSize

Change-Id: I2533c6f1d4a9b2cf5c141ba7fdd516d4c9d88502
This commit is contained in:
Krystian Chmielewski 2020-10-27 15:16:42 +01:00 committed by sys_ocldev
parent c678f3d9b0
commit 0d35af9327
11 changed files with 78 additions and 74 deletions

View File

@ -68,7 +68,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
// Update global work size to force macro-block to HW thread execution model
Vec3<size_t> gws = {numThreadsX * simdWidth, 1, 1};
Vec3<size_t> lws = {vmeKernel->getKernelInfo().reqdWorkGroupSize[0], 1, 1};
Vec3<size_t> lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1};
DispatchInfoBuilder<SplitDispatch::Dim::d2D, SplitDispatch::SplitMode::NoSplit> builder;
builder.setDispatchGeometry(gws, lws, inOffset, gws, lws);

View File

@ -57,13 +57,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
bool haveRequiredWorkGroupSize = false;
if (kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset) {
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) {
haveRequiredWorkGroupSize = true;
}
size_t remainder = 0;
size_t totalWorkItems = 1u;
const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;
size_t reqdWorkgroupSize[3] = {};
for (auto i = 0u; i < workDim; i++) {
region[i] = globalWorkSizeIn ? globalWorkSizeIn[i] : 0;
@ -76,7 +77,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
if (localWorkSizeIn) {
if (haveRequiredWorkGroupSize) {
if (kernelInfo.reqdWorkGroupSize[i] != localWorkSizeIn[i]) {
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != localWorkSizeIn[i]) {
return CL_INVALID_WORK_GROUP_SIZE;
}
}
@ -100,7 +101,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
}
if (haveRequiredWorkGroupSize) {
localWkgSizeToPass = kernelInfo.reqdWorkGroupSize;
reqdWorkgroupSize[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
reqdWorkgroupSize[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
reqdWorkgroupSize[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
localWkgSizeToPass = reqdWorkgroupSize;
}
NullSurface s;

View File

@ -603,9 +603,9 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
requiredWorkGroupSize.val[0] = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
requiredWorkGroupSize.val[1] = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
requiredWorkGroupSize.val[2] = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
requiredWorkGroupSize.val[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
requiredWorkGroupSize.val[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
requiredWorkGroupSize.val[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
srcSize = sizeof(requiredWorkGroupSize);
pSrc = &requiredWorkGroupSize;
break;
@ -2025,9 +2025,9 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
kernelData->m_ScratchSpacePatchValue = 0;
kernelData->m_SIMDSize = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize : 0;
kernelData->m_HasBarriers = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->HasBarriers : 0;
kernelData->m_RequiredWkgSizes[0] = kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[0]) : 0;
kernelData->m_RequiredWkgSizes[1] = kernelInfo.reqdWorkGroupSize[1] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[1]) : 0;
kernelData->m_RequiredWkgSizes[2] = kernelInfo.reqdWorkGroupSize[2] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[2]) : 0;
kernelData->m_RequiredWkgSizes[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
kernelData->m_RequiredWkgSizes[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
kernelData->m_RequiredWkgSizes[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize;
bool localIdRequired = false;

View File

@ -31,7 +31,7 @@
namespace NEO {
bool useKernelDescriptor = false;
bool useKernelDescriptor = true;
struct KernelArgumentType {
const char *argTypeQualifier;
@ -184,13 +184,6 @@ KernelInfo::~KernelInfo() {
void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
this->patchInfo.executionEnvironment = execEnv;
if (execEnv->RequiredWorkGroupSizeX != 0) {
this->reqdWorkGroupSize[0] = execEnv->RequiredWorkGroupSizeX;
this->reqdWorkGroupSize[1] = execEnv->RequiredWorkGroupSizeY;
this->reqdWorkGroupSize[2] = execEnv->RequiredWorkGroupSizeZ;
DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeY > 0));
DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeZ > 0));
}
this->workgroupWalkOrder[0] = 0;
this->workgroupWalkOrder[1] = 1;
this->workgroupWalkOrder[2] = 2;

View File

@ -162,9 +162,9 @@ struct KernelInfo {
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
}
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
maxRequiredWorkGroupSize = maxWorkGroupSize;
@ -199,7 +199,6 @@ struct KernelInfo {
bool hasStatelessAccessToHostMemory = false;
bool isVmeWorkload = false;
char *crossThreadData = nullptr;
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};
size_t requiredSubGroupSize = 0;
std::array<uint8_t, 3> workgroupWalkOrder = {{0, 1, 2}};
std::array<uint8_t, 3> workgroupDimensionsOrder = {{0, 1, 2}};

View File

@ -25,9 +25,9 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
ASSERT_EQ(32u, largestCompiledSIMDSize);
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeX);
auto requiredWorkGroupSizeY = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeY);
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeZ);
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
auto requiredWorkGroupSizeY = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pKernel->maxKernelWorkGroupSize))) {

View File

@ -1006,9 +1006,9 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre
MockKernelWithInternals mockKernel{*pClDevice};
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16;
mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0;
mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
overwriteBuiltInBinaryName("media_kernels_backend");
BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice);
@ -1056,9 +1056,9 @@ TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParams
MockKernelWithInternals mockKernel{*pClDevice};
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16;
mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0;
mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
Vec3<size_t> gws{352, 288, 0};
Vec3<size_t> elws{0, 0, 0};

View File

@ -1242,9 +1242,9 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
info.patchInfo.pAllocateStatelessPrivateSurface = &privateSurface;
info.reqdWorkGroupSize[0] = 4;
info.reqdWorkGroupSize[1] = 8;
info.reqdWorkGroupSize[2] = 2;
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 4;
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 8;
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 2;
info.workloadInfo.slmStaticSize = 1652;
@ -1319,9 +1319,9 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue);
EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize);
EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers);
EXPECT_EQ(info.reqdWorkGroupSize[0], kernelData->m_RequiredWkgSizes[0]);
EXPECT_EQ(info.reqdWorkGroupSize[1], kernelData->m_RequiredWkgSizes[1]);
EXPECT_EQ(info.reqdWorkGroupSize[2], kernelData->m_RequiredWkgSizes[2]);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], kernelData->m_RequiredWkgSizes[0]);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelData->m_RequiredWkgSizes[1]);
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2], kernelData->m_RequiredWkgSizes[2]);
EXPECT_EQ(info.workloadInfo.slmStaticSize, kernelData->m_InilineSLMSize);
if (localIDPresent.flattend || localIDPresent.x || localIDPresent.y || localIDPresent.z)

View File

@ -2236,53 +2236,53 @@ TEST_F(KernelExecutionEnvironmentTest, GivenLargestCompiledSimdSizeEqualOneWhenG
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsZeroWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = 0;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 0;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 0;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 0;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
}
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeLowerThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast<uint32_t>(maxWorkGroupSize / 2);
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 1;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 1;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast<uint16_t>(maxWorkGroupSize / 2);
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 1;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 1;
EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
}
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast<uint32_t>(maxWorkGroupSize);
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = static_cast<uint32_t>(maxWorkGroupSize);
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = static_cast<uint32_t>(maxWorkGroupSize);
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast<uint16_t>(maxWorkGroupSize);
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = static_cast<uint16_t>(maxWorkGroupSize);
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = static_cast<uint16_t>(maxWorkGroupSize);
EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
}
struct KernelCrossThreadTests : Test<ClDeviceFixture> {

View File

@ -322,9 +322,9 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[0]);
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[1]);
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[2]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
}
TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect) {
@ -357,9 +357,9 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
buildAndDecode();
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
EXPECT_EQ(32u, pKernelInfo->reqdWorkGroupSize[0]);
EXPECT_EQ(16u, pKernelInfo->reqdWorkGroupSize[1]);
EXPECT_EQ(8u, pKernelInfo->reqdWorkGroupSize[2]);
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
EXPECT_TRUE(pKernelInfo->requiresSshForBuffers);
}
@ -459,14 +459,21 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa
TEST_F(KernelDataTest, GivenKernelAttributesInfoWhenBuildingThenProgramIsCorrect) {
iOpenCL::SPatchKernelAttributesInfo kernelAttributesInfo;
kernelAttributesInfo.Token = PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO;
kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo);
kernelAttributesInfo.AttributesSize = 0x10;
kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize;
const std::string attributesValue = "dummy_attribute";
pPatchList = &kernelAttributesInfo;
patchListSize = kernelAttributesInfo.Size;
std::vector<char> patchToken(sizeof(iOpenCL::SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize);
memcpy_s(patchToken.data(), patchToken.size(), &kernelAttributesInfo, sizeof(iOpenCL::SPatchKernelAttributesInfo));
memcpy_s(patchToken.data() + sizeof(iOpenCL::SPatchKernelAttributesInfo), kernelAttributesInfo.AttributesSize,
attributesValue.data(), attributesValue.size());
pPatchList = patchToken.data();
patchListSize = static_cast<uint32_t>(patchToken.size());
buildAndDecode();
EXPECT_EQ(attributesValue, pKernelInfo->kernelDescriptor.kernelMetadata.kernelLanguageAttributes);
EXPECT_EQ_CONST(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, pKernelInfo->patchInfo.pKernelAttributesInfo->Token);
}

View File

@ -286,9 +286,11 @@ TEST(KernelInfoFromPatchTokens, GivenDefaultModeThenKernelDescriptorIsNotBeingPo
kernelTokens.tokens.kernelArgs.resize(2);
kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg;
NEO::useKernelDescriptor = false;
NEO::KernelInfo kernelInfo = {};
NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *));
EXPECT_TRUE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty());
NEO::useKernelDescriptor = true;
}
TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescriptorIsBeingPopulated) {
@ -306,6 +308,5 @@ TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescri
NEO::KernelInfo kernelInfo = {};
NEO::useKernelDescriptor = true;
NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *));
NEO::useKernelDescriptor = false;
EXPECT_FALSE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty());
}