KernelInfo refactor
Using kernelDescriptor.kernelAttributes.requiredWorkgroupSize Change-Id: I2533c6f1d4a9b2cf5c141ba7fdd516d4c9d88502
This commit is contained in:
parent
c678f3d9b0
commit
0d35af9327
|
@ -68,7 +68,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
|||
|
||||
// Update global work size to force macro-block to HW thread execution model
|
||||
Vec3<size_t> gws = {numThreadsX * simdWidth, 1, 1};
|
||||
Vec3<size_t> lws = {vmeKernel->getKernelInfo().reqdWorkGroupSize[0], 1, 1};
|
||||
Vec3<size_t> lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1};
|
||||
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d2D, SplitDispatch::SplitMode::NoSplit> builder;
|
||||
builder.setDispatchGeometry(gws, lws, inOffset, gws, lws);
|
||||
|
|
|
@ -57,13 +57,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
|||
|
||||
bool haveRequiredWorkGroupSize = false;
|
||||
|
||||
if (kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset) {
|
||||
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) {
|
||||
haveRequiredWorkGroupSize = true;
|
||||
}
|
||||
|
||||
size_t remainder = 0;
|
||||
size_t totalWorkItems = 1u;
|
||||
const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;
|
||||
size_t reqdWorkgroupSize[3] = {};
|
||||
|
||||
for (auto i = 0u; i < workDim; i++) {
|
||||
region[i] = globalWorkSizeIn ? globalWorkSizeIn[i] : 0;
|
||||
|
@ -76,7 +77,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
|||
|
||||
if (localWorkSizeIn) {
|
||||
if (haveRequiredWorkGroupSize) {
|
||||
if (kernelInfo.reqdWorkGroupSize[i] != localWorkSizeIn[i]) {
|
||||
if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != localWorkSizeIn[i]) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
}
|
||||
|
@ -100,7 +101,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
|
|||
}
|
||||
|
||||
if (haveRequiredWorkGroupSize) {
|
||||
localWkgSizeToPass = kernelInfo.reqdWorkGroupSize;
|
||||
reqdWorkgroupSize[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
reqdWorkgroupSize[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
reqdWorkgroupSize[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
localWkgSizeToPass = reqdWorkgroupSize;
|
||||
}
|
||||
|
||||
NullSurface s;
|
||||
|
|
|
@ -603,9 +603,9 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
|||
|
||||
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
|
||||
DEBUG_BREAK_IF(!patchInfo.executionEnvironment);
|
||||
requiredWorkGroupSize.val[0] = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
requiredWorkGroupSize.val[1] = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
requiredWorkGroupSize.val[2] = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
requiredWorkGroupSize.val[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
requiredWorkGroupSize.val[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
requiredWorkGroupSize.val[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
srcSize = sizeof(requiredWorkGroupSize);
|
||||
pSrc = &requiredWorkGroupSize;
|
||||
break;
|
||||
|
@ -2025,9 +2025,9 @@ uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface,
|
|||
kernelData->m_ScratchSpacePatchValue = 0;
|
||||
kernelData->m_SIMDSize = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize : 0;
|
||||
kernelData->m_HasBarriers = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->HasBarriers : 0;
|
||||
kernelData->m_RequiredWkgSizes[0] = kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[0]) : 0;
|
||||
kernelData->m_RequiredWkgSizes[1] = kernelInfo.reqdWorkGroupSize[1] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[1]) : 0;
|
||||
kernelData->m_RequiredWkgSizes[2] = kernelInfo.reqdWorkGroupSize[2] != WorkloadInfo::undefinedOffset ? static_cast<uint32_t>(kernelInfo.reqdWorkGroupSize[2]) : 0;
|
||||
kernelData->m_RequiredWkgSizes[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
kernelData->m_RequiredWkgSizes[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
kernelData->m_RequiredWkgSizes[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize;
|
||||
|
||||
bool localIdRequired = false;
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
namespace NEO {
|
||||
|
||||
bool useKernelDescriptor = false;
|
||||
bool useKernelDescriptor = true;
|
||||
|
||||
struct KernelArgumentType {
|
||||
const char *argTypeQualifier;
|
||||
|
@ -184,13 +184,6 @@ KernelInfo::~KernelInfo() {
|
|||
|
||||
void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) {
|
||||
this->patchInfo.executionEnvironment = execEnv;
|
||||
if (execEnv->RequiredWorkGroupSizeX != 0) {
|
||||
this->reqdWorkGroupSize[0] = execEnv->RequiredWorkGroupSizeX;
|
||||
this->reqdWorkGroupSize[1] = execEnv->RequiredWorkGroupSizeY;
|
||||
this->reqdWorkGroupSize[2] = execEnv->RequiredWorkGroupSizeZ;
|
||||
DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeY > 0));
|
||||
DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeZ > 0));
|
||||
}
|
||||
this->workgroupWalkOrder[0] = 0;
|
||||
this->workgroupWalkOrder[1] = 1;
|
||||
this->workgroupWalkOrder[2] = 2;
|
||||
|
|
|
@ -162,9 +162,9 @@ struct KernelInfo {
|
|||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
|
||||
}
|
||||
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
|
||||
auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
|
||||
maxRequiredWorkGroupSize = maxWorkGroupSize;
|
||||
|
@ -199,7 +199,6 @@ struct KernelInfo {
|
|||
bool hasStatelessAccessToHostMemory = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};
|
||||
size_t requiredSubGroupSize = 0;
|
||||
std::array<uint8_t, 3> workgroupWalkOrder = {{0, 1, 2}};
|
||||
std::array<uint8_t, 3> workgroupDimensionsOrder = {{0, 1, 2}};
|
||||
|
|
|
@ -25,9 +25,9 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
|
|||
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
||||
ASSERT_EQ(32u, largestCompiledSIMDSize);
|
||||
|
||||
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeX);
|
||||
auto requiredWorkGroupSizeY = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeY);
|
||||
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeZ);
|
||||
auto requiredWorkGroupSizeX = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
auto requiredWorkGroupSizeY = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
auto requiredWorkGroupSizeZ = static_cast<size_t>(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
|
||||
calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast<size_t>(pKernel->maxKernelWorkGroupSize))) {
|
||||
|
|
|
@ -1006,9 +1006,9 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre
|
|||
MockKernelWithInternals mockKernel{*pClDevice};
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
|
||||
|
||||
overwriteBuiltInBinaryName("media_kernels_backend");
|
||||
BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice);
|
||||
|
@ -1056,9 +1056,9 @@ TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParams
|
|||
MockKernelWithInternals mockKernel{*pClDevice};
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0;
|
||||
((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
|
||||
|
||||
Vec3<size_t> gws{352, 288, 0};
|
||||
Vec3<size_t> elws{0, 0, 0};
|
||||
|
|
|
@ -1242,9 +1242,9 @@ class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam<s
|
|||
|
||||
info.patchInfo.pAllocateStatelessPrivateSurface = &privateSurface;
|
||||
|
||||
info.reqdWorkGroupSize[0] = 4;
|
||||
info.reqdWorkGroupSize[1] = 8;
|
||||
info.reqdWorkGroupSize[2] = 2;
|
||||
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 4;
|
||||
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 8;
|
||||
info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 2;
|
||||
|
||||
info.workloadInfo.slmStaticSize = 1652;
|
||||
|
||||
|
@ -1319,9 +1319,9 @@ TEST_P(ReflectionSurfaceHelperSetKernelDataTest, WhenSettingKernelDataThenDataAn
|
|||
EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue);
|
||||
EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize);
|
||||
EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers);
|
||||
EXPECT_EQ(info.reqdWorkGroupSize[0], kernelData->m_RequiredWkgSizes[0]);
|
||||
EXPECT_EQ(info.reqdWorkGroupSize[1], kernelData->m_RequiredWkgSizes[1]);
|
||||
EXPECT_EQ(info.reqdWorkGroupSize[2], kernelData->m_RequiredWkgSizes[2]);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], kernelData->m_RequiredWkgSizes[0]);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelData->m_RequiredWkgSizes[1]);
|
||||
EXPECT_EQ(info.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2], kernelData->m_RequiredWkgSizes[2]);
|
||||
EXPECT_EQ(info.workloadInfo.slmStaticSize, kernelData->m_InilineSLMSize);
|
||||
|
||||
if (localIDPresent.flattend || localIDPresent.x || localIDPresent.y || localIDPresent.z)
|
||||
|
|
|
@ -2236,53 +2236,53 @@ TEST_F(KernelExecutionEnvironmentTest, GivenLargestCompiledSimdSizeEqualOneWhenG
|
|||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsZeroWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
|
||||
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = 0;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 0;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 0;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 0;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0;
|
||||
|
||||
EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeLowerThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
|
||||
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast<uint32_t>(maxWorkGroupSize / 2);
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 1;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 1;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast<uint16_t>(maxWorkGroupSize / 2);
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 1;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 1;
|
||||
|
||||
EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
|
||||
}
|
||||
|
||||
TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) {
|
||||
auto maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast<uint32_t>(maxWorkGroupSize);
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = static_cast<uint32_t>(maxWorkGroupSize);
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = static_cast<uint32_t>(maxWorkGroupSize);
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast<uint16_t>(maxWorkGroupSize);
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = static_cast<uint16_t>(maxWorkGroupSize);
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = static_cast<uint16_t>(maxWorkGroupSize);
|
||||
|
||||
EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize));
|
||||
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY;
|
||||
const_cast<SPatchExecutionEnvironment *>(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY;
|
||||
this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ;
|
||||
}
|
||||
|
||||
struct KernelCrossThreadTests : Test<ClDeviceFixture> {
|
||||
|
|
|
@ -322,9 +322,9 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingT
|
|||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[0]);
|
||||
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[1]);
|
||||
EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[2]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
}
|
||||
|
||||
TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect) {
|
||||
|
@ -357,9 +357,9 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect
|
|||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token);
|
||||
EXPECT_EQ(32u, pKernelInfo->reqdWorkGroupSize[0]);
|
||||
EXPECT_EQ(16u, pKernelInfo->reqdWorkGroupSize[1]);
|
||||
EXPECT_EQ(8u, pKernelInfo->reqdWorkGroupSize[2]);
|
||||
EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||
EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||
EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||
EXPECT_TRUE(pKernelInfo->requiresSshForBuffers);
|
||||
}
|
||||
|
||||
|
@ -459,14 +459,21 @@ TEST_F(KernelDataTest, GivenExecutionEnvironmentRequiresSubgroupIndependentForwa
|
|||
TEST_F(KernelDataTest, GivenKernelAttributesInfoWhenBuildingThenProgramIsCorrect) {
|
||||
iOpenCL::SPatchKernelAttributesInfo kernelAttributesInfo;
|
||||
kernelAttributesInfo.Token = PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO;
|
||||
kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo);
|
||||
kernelAttributesInfo.AttributesSize = 0x10;
|
||||
kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize;
|
||||
const std::string attributesValue = "dummy_attribute";
|
||||
|
||||
pPatchList = &kernelAttributesInfo;
|
||||
patchListSize = kernelAttributesInfo.Size;
|
||||
std::vector<char> patchToken(sizeof(iOpenCL::SPatchKernelAttributesInfo) + kernelAttributesInfo.AttributesSize);
|
||||
memcpy_s(patchToken.data(), patchToken.size(), &kernelAttributesInfo, sizeof(iOpenCL::SPatchKernelAttributesInfo));
|
||||
memcpy_s(patchToken.data() + sizeof(iOpenCL::SPatchKernelAttributesInfo), kernelAttributesInfo.AttributesSize,
|
||||
attributesValue.data(), attributesValue.size());
|
||||
|
||||
pPatchList = patchToken.data();
|
||||
patchListSize = static_cast<uint32_t>(patchToken.size());
|
||||
|
||||
buildAndDecode();
|
||||
|
||||
EXPECT_EQ(attributesValue, pKernelInfo->kernelDescriptor.kernelMetadata.kernelLanguageAttributes);
|
||||
EXPECT_EQ_CONST(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, pKernelInfo->patchInfo.pKernelAttributesInfo->Token);
|
||||
}
|
||||
|
||||
|
|
|
@ -286,9 +286,11 @@ TEST(KernelInfoFromPatchTokens, GivenDefaultModeThenKernelDescriptorIsNotBeingPo
|
|||
|
||||
kernelTokens.tokens.kernelArgs.resize(2);
|
||||
kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg;
|
||||
NEO::useKernelDescriptor = false;
|
||||
NEO::KernelInfo kernelInfo = {};
|
||||
NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *));
|
||||
EXPECT_TRUE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty());
|
||||
NEO::useKernelDescriptor = true;
|
||||
}
|
||||
|
||||
TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescriptorIsBeingPopulated) {
|
||||
|
@ -306,6 +308,5 @@ TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescri
|
|||
NEO::KernelInfo kernelInfo = {};
|
||||
NEO::useKernelDescriptor = true;
|
||||
NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *));
|
||||
NEO::useKernelDescriptor = false;
|
||||
EXPECT_FALSE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue