Enable OpenCL 3.0 by default on all devices

Change-Id: Ic5e46177c957896c499b7aa6727af48105b664ac
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2020-10-07 13:37:34 +02:00
committed by sys_ocldev
parent 595f374634
commit 7d0f23bfc9
35 changed files with 1151 additions and 1341 deletions

View File

@ -6,6 +6,7 @@
*/
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
using namespace NEO;
@ -91,37 +92,37 @@ INSTANTIATE_TEST_CASE_P(wgs,
::testing::ValuesIn(WorkDimensions)));
TEST_P(KernelSubGroupInfoReturnSizeTest, GivenWorkGroupSizeWhenGettingMaxSubGroupSizeThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
WorkSizeParam workSize;
size_t workDim;
std::tie(workSize, workDim) = GetParam();
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
memset(inputValue, 0, sizeof(inputValue));
inputValue[0] = workSize.x;
if (workDim > 1) {
inputValue[1] = workSize.y;
}
if (workDim > 2) {
inputValue[2] = workSize.z;
}
paramValueSizeRet = 0;
WorkSizeParam workSize;
size_t workDim;
std::tie(workSize, workDim) = GetParam();
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
sizeof(size_t) * workDim,
inputValue,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(maxSimdSize, paramValue[0]);
memset(inputValue, 0, sizeof(inputValue));
inputValue[0] = workSize.x;
if (workDim > 1) {
inputValue[1] = workSize.y;
}
if (workDim > 2) {
inputValue[2] = workSize.z;
}
paramValueSizeRet = 0;
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
sizeof(size_t) * workDim,
inputValue,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(maxSimdSize, paramValue[0]);
}
typedef KernelSubGroupInfoParamFixture<std::tuple<WorkSizeParam, size_t>> KernelSubGroupInfoReturnCountTest;
@ -133,48 +134,48 @@ INSTANTIATE_TEST_CASE_P(wgs,
::testing::ValuesIn(WorkDimensions)));
TEST_P(KernelSubGroupInfoReturnCountTest, GivenWorkGroupSizeWhenGettingSubGroupCountThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
WorkSizeParam workSize;
size_t workDim;
std::tie(workSize, workDim) = GetParam();
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
memset(inputValue, 0, sizeof(inputValue));
inputValue[0] = workSize.x;
if (workDim > 1) {
inputValue[1] = workSize.y;
}
if (workDim > 2) {
inputValue[2] = workSize.z;
}
paramValueSizeRet = 0;
WorkSizeParam workSize;
size_t workDim;
std::tie(workSize, workDim) = GetParam();
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
sizeof(size_t) * workDim,
inputValue,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
memset(inputValue, 0, sizeof(inputValue));
inputValue[0] = workSize.x;
if (workDim > 1) {
inputValue[1] = workSize.y;
}
if (workDim > 2) {
inputValue[2] = workSize.z;
}
paramValueSizeRet = 0;
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
sizeof(size_t) * workDim,
inputValue,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(sizeof(size_t), paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
auto calculatedWGS = workSize.x;
if (workDim > 1) {
calculatedWGS *= workSize.y;
}
if (workDim > 2) {
calculatedWGS *= workSize.z;
}
EXPECT_EQ(sizeof(size_t), paramValueSizeRet);
if (calculatedWGS % maxSimdSize == 0) {
EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]);
} else {
EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]);
}
auto calculatedWGS = workSize.x;
if (workDim > 1) {
calculatedWGS *= workSize.y;
}
if (workDim > 2) {
calculatedWGS *= workSize.z;
}
if (calculatedWGS % maxSimdSize == 0) {
EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]);
} else {
EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]);
}
}
@ -189,158 +190,158 @@ INSTANTIATE_TEST_CASE_P(sgn,
::testing::ValuesIn(WorkDimensions)));
TEST_P(KernelSubGroupInfoReturnLocalSizeTest, GivenWorkGroupSizeWhenGettingLocalSizeThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
size_t subGroupsNum;
size_t workDim;
std::tie(subGroupsNum, workDim) = GetParam();
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
inputValue[0] = subGroupsNum;
size_t subGroupsNum;
size_t workDim;
std::tie(subGroupsNum, workDim) = GetParam();
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
sizeof(size_t),
inputValue,
sizeof(size_t) * workDim,
paramValue,
&paramValueSizeRet);
inputValue[0] = subGroupsNum;
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
sizeof(size_t),
inputValue,
sizeof(size_t) * workDim,
paramValue,
&paramValueSizeRet);
EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize;
if (workGroupSize > calculatedMaxWorkgroupSize) {
workGroupSize = 0;
}
EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet);
EXPECT_EQ(workGroupSize, paramValue[0]);
if (workDim > 1) {
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]);
}
if (workDim > 2) {
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]);
}
size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize;
if (workGroupSize > calculatedMaxWorkgroupSize) {
workGroupSize = 0;
}
EXPECT_EQ(workGroupSize, paramValue[0]);
if (workDim > 1) {
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]);
}
if (workDim > 2) {
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]);
}
}
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnMaxNumberTest;
TEST_F(KernelSubGroupInfoReturnMaxNumberTest, GivenWorkGroupSizeWhenGettingMaxNumSubGroupsThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_MAX_NUM_SUB_GROUPS,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize));
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_MAX_NUM_SUB_GROUPS,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize));
}
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileNumberTest;
TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileNumSubGroupThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber));
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber));
}
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileSizeTest;
TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSubGroupSizeThenReturnIsCalculatedCorrectly) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL,
0,
nullptr,
sizeof(size_t),
paramValue,
&paramValueSizeRet);
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(CL_SUCCESS, retVal);
size_t requiredSubGroupSize = 0;
auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size(");
if (start != std::string::npos) {
start += strlen("intel_reqd_sub_group_size(");
auto stop = pKernel->getKernelInfo().attributes.find(")", start);
requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start));
}
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
EXPECT_EQ(paramValue[0], requiredSubGroupSize);
size_t requiredSubGroupSize = 0;
auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size(");
if (start != std::string::npos) {
start += strlen("intel_reqd_sub_group_size(");
auto stop = pKernel->getKernelInfo().attributes.find(")", start);
requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start));
}
EXPECT_EQ(paramValue[0], requiredSubGroupSize);
}
TEST_F(KernelSubGroupInfoTest, GivenNullKernelWhenGettingSubGroupInfoThenInvalidKernelErrorIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
nullptr,
pClDevice,
0,
0,
nullptr,
0,
nullptr,
nullptr);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
}
retVal = clGetKernelSubGroupInfo(
nullptr,
pClDevice,
0,
0,
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
}
TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoThenInvalidDeviceErrorIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
pKernel,
nullptr,
0,
0,
nullptr,
0,
nullptr,
nullptr);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
nullptr,
0,
0,
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
}
TEST_F(KernelSubGroupInfoTest, GivenInvalidParamNameWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
0,
sizeof(size_t),
inputValue,
sizeof(size_t),
paramValue,
nullptr);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
0,
sizeof(size_t),
inputValue,
sizeof(size_t),
paramValue,
nullptr);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
uint32_t /*cl_kernel_sub_group_info*/ KernelSubGroupInfoInputParams[] = {
@ -385,167 +386,166 @@ TEST_P(KernelSubGroupInfoInputParamsTest, GivenOpenClVersionLowerThan21WhenGetti
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimZeroWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
0,
inputValue,
0,
nullptr,
nullptr);
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
0,
inputValue,
0,
nullptr,
nullptr);
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenIndivisibleWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
(sizeof(size_t) * workDim) - 1,
inputValue,
0,
nullptr,
nullptr);
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
(sizeof(size_t) * workDim) - 1,
inputValue,
0,
nullptr,
nullptr);
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimGreaterThanMaxWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t) * (workDim + 1),
inputValue,
0,
nullptr,
nullptr);
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t) * (workDim + 1),
inputValue,
0,
nullptr,
nullptr);
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenInputValueIsNullWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t) * (workDim),
nullptr,
0,
nullptr,
nullptr);
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
? maxWorkDim
: 1;
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t) * (workDim),
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenParamValueSizeZeroWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
0,
paramValue,
nullptr);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
0,
paramValue,
nullptr);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenUnalignedParamValueSizeWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
(sizeof(size_t) * workDim) - 1,
paramValue,
nullptr);
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
(sizeof(size_t) * workDim) - 1,
paramValue,
nullptr);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenTooLargeParamValueSizeWhenGettingSubGroupInfoThenCorrectRetValIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
// paramValue size / sizeof(size_t) > MaxWorkDim
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
sizeof(size_t) * (workDim + 1),
paramValue,
nullptr);
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
// paramValue size / sizeof(size_t) > MaxWorkDim
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
sizeof(size_t) * (workDim + 1),
paramValue,
nullptr);
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
TEST_P(KernelSubGroupInfoInputParamsTest, GivenNullPtrForReturnWhenGettingKernelSubGroupInfoThenSuccessIsReturned) {
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
0,
nullptr,
nullptr);
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
retVal = clGetKernelSubGroupInfo(
pKernel,
pClDevice,
GetParam(),
sizeof(size_t),
inputValue,
0,
nullptr,
nullptr);
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
}
} // namespace ULT

View File

@ -127,8 +127,9 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) {
EXPECT_NE(nullptr, caps.vendor);
EXPECT_NE(nullptr, caps.driverVersion);
EXPECT_NE(nullptr, caps.profile);
EXPECT_NE(nullptr, caps.clVersion);
EXPECT_NE(nullptr, caps.clCVersion);
EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion);
auto expectedClCVersion = (device->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 ");
EXPECT_STREQ(expectedClCVersion, caps.clCVersion);
EXPECT_NE(0u, caps.numericClVersion);
EXPECT_GT(caps.openclCAllVersions.size(), 0u);
EXPECT_GT(caps.openclCFeatures.size(), 0u);
@ -1196,9 +1197,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseCertainWorkgroupSizeWhenDeviceIsCreatedIt
}
TEST(DeviceGetCaps, givenDebugFlagToDisableDeviceEnqueuesWhenCreatingDeviceThenDeviceQueueCapsAreSetCorrectly) {
if (defaultHwInfo->capabilityTable.clVersionSupport == 21) {
GTEST_SKIP();
}
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceDeviceEnqueueSupport.set(0);

View File

@ -483,8 +483,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOff
class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
public:
void SetUp() override {
ExecutionModelKernelFixture::SetUp();
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
ExecutionModelKernelFixture::SetUp();
cl_queue_properties properties[5] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
@ -503,6 +504,10 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
ASSERT_NE(nullptr, devQueue);
}
void TearDown() override {
if (IsSkipped()) {
return;
}
if (devQueue) {
delete devQueue;
}
@ -522,130 +527,122 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
};
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto usedBeforeSSH = ssh->getUsed();
auto usedBeforeDSH = dsh->getUsed();
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto usedBeforeSSH = ssh->getUsed();
auto usedBeforeDSH = dsh->getUsed();
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
auto usedAfterSSH = ssh->getUsed();
auto usedAfterDSH = dsh->getUsed();
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
auto usedAfterSSH = ssh->getUsed();
auto usedAfterDSH = dsh->getUsed();
EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH);
EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH);
EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH);
EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH);
alignedFree(ssh->getCpuBase());
delete ssh;
}
alignedFree(ssh->getCpuBase());
delete ssh;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
uint32_t parentCount = 4;
uint32_t parentCount = 4;
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
alignedFree(ssh->getCpuBase());
delete ssh;
}
alignedFree(ssh->getCpuBase());
delete ssh;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
MockContext mockContext;
MockDeviceQueueHw<FamilyType> *devQueueHw = new MockDeviceQueueHw<FamilyType>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
MockContext mockContext;
MockDeviceQueueHw<FamilyType> *devQueueHw = new MockDeviceQueueHw<FamilyType>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
ASSERT_NE(nullptr, devQueueHw);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
uint32_t parentCount = 1;
uint32_t parentCount = 1;
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
alignedFree(ssh->getCpuBase());
delete ssh;
delete devQueueHw;
}
alignedFree(ssh->getCpuBase());
delete ssh;
delete devQueueHw;
}
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
MockContext mockContext;
auto devQueueHw = std::make_unique<MockDeviceQueueHw<FamilyType>>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
MockContext mockContext;
auto devQueueHw = std::make_unique<MockDeviceQueueHw<FamilyType>>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
uint32_t parentCount = 1;
uint32_t parentCount = 1;
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
auto iddCount = blockManager->getCount();
for (uint32_t i = 0; i < iddCount; i++) {
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
}
auto surfaceStateHeapSize =
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
auto iddStartIndex = parentCount;
for (uint32_t i = 0; i < iddCount; i++) {
EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable());
}
alignedFree(ssh->getCpuBase());
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
auto iddCount = blockManager->getCount();
for (uint32_t i = 0; i < iddCount; i++) {
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
}
auto surfaceStateHeapSize =
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
auto iddStartIndex = parentCount;
for (uint32_t i = 0; i < iddCount; i++) {
EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable());
}
alignedFree(ssh->getCpuBase());
}
static const char *binaryFile = "simple_block_kernel";

View File

@ -38,267 +38,250 @@ typedef ExecutionModelKernelTest ParentKernelEnqueueTest;
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
EXPECT_NE(nullptr, executionModelDsh);
EXPECT_NE(nullptr, executionModelDsh);
INTERFACE_DESCRIPTOR_DATA *idData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize));
INTERFACE_DESCRIPTOR_DATA *idData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize));
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
if (pKernel->getKernelInfo().name == "kernel_reflection") {
EXPECT_NE(0u, idData[0].getSamplerCount());
EXPECT_NE(0u, idData[0].getSamplerStatePointer());
}
EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength());
EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength());
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode());
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress), idData[0].getKernelStartPointer());
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh());
const uint32_t blockFirstIndex = 1;
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength());
EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength());
EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer());
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
if (pKernel->getKernelInfo().name == "kernel_reflection") {
EXPECT_NE(0u, idData[0].getSamplerCount());
EXPECT_NE(0u, idData[0].getSamplerStatePointer());
}
EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength());
EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength());
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode());
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress), idData[0].getKernelStartPointer());
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh());
const uint32_t blockFirstIndex = 1;
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength());
EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength());
EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer());
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
}
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
}
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCSR);
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
int32_t executionStamp = 0;
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCSR);
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
kernelRequiringPrivateSurface = i;
break;
}
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
kernelRequiringPrivateSurface = i;
break;
}
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
if (privateSurface == nullptr) {
privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface);
}
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
}
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
if (privateSurface == nullptr) {
privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface);
}
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
kernelRequiringPrivateSurface = i;
break;
}
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
kernelRequiringPrivateSurface = i;
break;
}
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
if (privateAllocation == nullptr) {
privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface);
}
auto uEvent = make_releaseable<UserEvent>(pContext);
auto clEvent = static_cast<cl_event>(uEvent.get());
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
EXPECT_FALSE(csr.isMadeResident(privateAllocation));
uEvent->setStatus(CL_COMPLETE);
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
}
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
if (privateAllocation == nullptr) {
privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface);
}
auto uEvent = make_releaseable<UserEvent>(pContext);
auto clEvent = static_cast<cl_event>(uEvent.get());
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
EXPECT_FALSE(csr.isMadeResident(privateAllocation));
uEvent->setStatus(CL_COMPLETE);
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) {
if (pClDevice->areOcl21FeaturesSupported()) {
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
blockKernelManager->makeInternalAllocationsResident(csr);
blockKernelManager->makeInternalAllocationsResident(csr);
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) {
if (pClDevice->areOcl21FeaturesSupported()) {
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
size_t offset[3] = {0, 0, 0};
size_t gws[3] = {1, 1, 1};
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
auto uEvent = make_releaseable<UserEvent>(pContext);
auto clEvent = static_cast<cl_event>(uEvent.get());
auto uEvent = make_releaseable<UserEvent>(pContext);
auto clEvent = static_cast<cl_event>(uEvent.get());
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
auto blockCount = blockKernelManager->getCount();
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
uEvent->setStatus(CL_COMPLETE);
uEvent->setStatus(CL_COMPLETE);
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
for (auto blockId = 0u; blockId < blockCount; blockId++) {
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
size_t executionModelDSHUsedBefore = dsh->getUsed();
auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
size_t executionModelDSHUsedBefore = dsh->getUsed();
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
size_t executionModelDSHUsedAfterFirst = dsh->getUsed();
EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst);
size_t executionModelDSHUsedAfterFirst = dsh->getUsed();
EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst);
pDevQueueHw->resetDeviceQueue();
pDevQueueHw->resetDeviceQueue();
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
size_t executionModelDSHUsedAfterSecond = dsh->getUsed();
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
}
size_t executionModelDSHUsedAfterSecond = dsh->getUsed();
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
pKernel->createReflectionSurface();
MockMultiDispatchInfo multiDispatchInfo(pKernel);
pKernel->createReflectionSurface();
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
EXPECT_EQ(ssh, ssh2);
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
}
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
EXPECT_EQ(ssh, ssh2);
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) {
@ -306,64 +289,61 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
// prealign the ssh so that it won't need to be realigned in enqueueKernel
// this way, we can assume the location in memory into which the surface states
// will be coies
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
// prealign the ssh so that it won't need to be realigned in enqueueKernel
// this way, we can assume the location in memory into which the surface states
// will be coies
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
// mark the assumed place for surface states
size_t parentSshOffset = 0;
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
// mark the assumed place for surface states
size_t parentSshOffset = 0;
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
ASSERT_NE(nullptr, pBlockInfo);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
if (blockKernel->getNumberOfBindingTableStates() > 0) {
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
if (blockKernel->getNumberOfBindingTableStates() > 0) {
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer));
auto *srcSurfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer));
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
}
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer));
auto *srcSurfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer));
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
}
delete blockKernel;
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
}
delete blockKernel;
}
}
@ -372,86 +352,78 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MockMultiDispatchInfo multiDispatchInfo(pKernel);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
}
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
cl_queue_properties properties[3] = {0};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
cl_queue_properties properties[3] = {0};
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, pClDevice, properties[0]);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockDeviceQueueHw<FamilyType> mockDevQueue(context, pClDevice, properties[0]);
context->setDefaultDeviceQueue(&mockDevQueue);
// Acquire CS to check if reset queue was called
mockDevQueue.acquireEMCriticalSection();
context->setDefaultDeviceQueue(&mockDevQueue);
// Acquire CS to check if reset queue was called
mockDevQueue.acquireEMCriticalSection();
auto mockEvent = make_releaseable<UserEvent>(context);
auto mockEvent = make_releaseable<UserEvent>(context);
cl_event eventBlocking = mockEvent.get();
cl_event eventBlocking = mockEvent.get();
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr);
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
}
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
ASSERT_NE(nullptr, pDevQueueHw);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
ASSERT_NE(nullptr, pDevQueueHw);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
int32_t executionStamp = 0;
auto mockCSR = new MockCsrBase<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCSR);
int32_t executionStamp = 0;
auto mockCSR = new MockCsrBase<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
pDevice->resetCommandStreamReceiver(mockCSR);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
auto &cmdStream = mockCSR->getCS(0);
auto &cmdStream = mockCSR->getCS(0);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
hwParser.findHardwareCommands<FamilyType>();
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
hwParser.findHardwareCommands<FamilyType>();
auto stateBaseAddressItor = hwParser.itorStateBaseAddress;
auto stateBaseAddressItor = hwParser.itorStateBaseAddress;
ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor);
ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor);
auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor;
auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor;
uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress();
uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress();
EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress());
EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress());
bool dshAllocationResident = false;
bool dshAllocationResident = false;
for (auto allocation : mockCSR->madeResidentGfxAllocations) {
if (allocation == pDevQueue->getDshBuffer()) {
dshAllocationResident = true;
break;
}
for (auto allocation : mockCSR->madeResidentGfxAllocations) {
if (allocation == pDevQueue->getDshBuffer()) {
dshAllocationResident = true;
break;
}
EXPECT_TRUE(dshAllocationResident);
}
EXPECT_TRUE(dshAllocationResident);
}
INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest,

View File

@ -28,167 +28,155 @@ static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
typedef ExecutionModelKernelTest ParentKernelDispatchTest;
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
EXPECT_EQ(0u, dshUsedBefore);
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
EXPECT_EQ(0u, dshUsedBefore);
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
MultiDispatchInfo multiDispatchInfo(pKernel);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
MultiDispatchInfo multiDispatchInfo(pKernel);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
EXPECT_EQ(0u, dshUsedAfter);
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
EXPECT_EQ(0u, dshUsedAfter);
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
}
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
EXPECT_EQ(ishOfDevQueue, ish);
}
EXPECT_EQ(ishOfDevQueue, ish);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
EXPECT_EQ(dshOfDevQueue, ioh);
}
EXPECT_EQ(dshOfDevQueue, ioh);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MockMultiDispatchInfo multiDispatchInfo(pKernel);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
auto iohUsed = ioh.getUsed();
EXPECT_EQ(0u, iohUsed);
}
auto iohUsed = ioh.getUsed();
EXPECT_EQ(0u, iohUsed);
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
KernelOperation *blockedCommandsData = nullptr;
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MockMultiDispatchInfo multiDispatchInfo(pKernel);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
MockMultiDispatchInfo multiDispatchInfo(pKernel);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData,
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
}
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
}
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
const size_t globalOffsets[3] = {0, 0, 0};
const size_t workItems[3] = {1, 1, 1};
MultiDispatchInfo multiDispatchInfo(pKernel);
MultiDispatchInfo multiDispatchInfo(pKernel);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData.get(),
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
ASSERT_NE(nullptr, blockedCommandsData);
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
multiDispatchInfo.push(dispatchInfo);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
blockedCommandsData.get(),
nullptr,
nullptr,
nullptr,
nullptr,
CL_COMMAND_NDRANGE_KERNEL);
ASSERT_NE(nullptr, blockedCommandsData);
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t sshUsed = blockedCommandsData->ssh->getUsed();
size_t sshUsed = blockedCommandsData->ssh->getUsed();
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) +
pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
UnitTestHelper<FamilyType>::getDefaultSshUsage();
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) +
pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
UnitTestHelper<FamilyType>::getDefaultSshUsage();
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
EXPECT_EQ(expectedSizeSSH, sshUsed);
}
EXPECT_GE(minRequiredSize, sshUsed);
// Total SSH size including EM must be greater then ssh allocated
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
EXPECT_EQ(expectedSizeSSH, sshUsed);
}
EXPECT_GE(minRequiredSize, sshUsed);
// Total SSH size including EM must be greater then ssh allocated
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
}
INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest,

View File

@ -53,6 +53,8 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture,
public DeviceQueueFixture {
public:
void SetUp() override {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
DebugManager.flags.EnableTimestampPacket.set(0);
ExecutionModelKernelFixture::SetUp();
CommandQueueHwFixture::SetUp(pClDevice, 0);
@ -60,10 +62,11 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture,
}
void TearDown() override {
DeviceQueueFixture::TearDown();
CommandQueueHwFixture::TearDown();
ExecutionModelKernelFixture::TearDown();
if (!IsSkipped()) {
DeviceQueueFixture::TearDown();
CommandQueueHwFixture::TearDown();
ExecutionModelKernelFixture::TearDown();
}
}
std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {

View File

@ -11,6 +11,7 @@
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/program/program_from_binary.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
using namespace NEO;
@ -19,17 +20,10 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest,
public PlatformFixture {
protected:
void SetUp() override {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
PlatformFixture::SetUp();
std::string temp;
temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion);
if (temp.find("OpenCL 1.2") != std::string::npos) {
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
pClDevice = new MockClDevice{pDevice};
return;
}
std::string options("-cl-std=CL2.0");
this->setOptions(options);
ProgramFromBinaryTest::SetUp();
@ -58,25 +52,23 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest,
}
void TearDown() override {
if (IsSkipped()) {
return;
}
if (pKernel != nullptr) {
pKernel->release();
}
std::string temp;
temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion);
ProgramFromBinaryTest::TearDown();
PlatformFixture::TearDown();
if (temp.find("OpenCL 1.2") != std::string::npos) {
if (pDevice != nullptr) {
delete pDevice;
pDevice = nullptr;
}
if (pClDevice != nullptr) {
delete pClDevice;
pClDevice = nullptr;
}
if (pDevice != nullptr) {
delete pDevice;
pDevice = nullptr;
}
if (pClDevice != nullptr) {
delete pClDevice;
pClDevice = nullptr;
}
}

View File

@ -16,12 +16,6 @@ LKFTEST_F(LkfTest, givenLkfWhenSlmSizeIsRequiredThenReturnCorrectValue) {
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
}
LKFTEST_F(LkfTest, givenLKFWhenCheckedOCLVersionThen21IsReported) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
}
LKFTEST_F(LkfTest, givenLKFWhenCheckedSvmSupportThenNoSvmIsReported) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_EQ(caps.svmCapabilities, 0u);

View File

@ -14,13 +14,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> Gen12LpDeviceCaps;
GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpDeviceWhenQueryingDeviceInfoThenOcl30IsReported) {
const auto &caps = pClDevice->getDeviceInfo();
auto expectedClCVersion = (pClDevice->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 ");
EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion);
EXPECT_STREQ(expectedClCVersion, caps.clCVersion);
}
HWTEST2_F(Gen12LpDeviceCaps, lpSkusDontSupportFP64, IsTGLLP) {
const auto &caps = pClDevice->getDeviceInfo();
std::string extensionString = caps.deviceExtensions;

View File

@ -7,7 +7,6 @@
if(TESTS_BDW)
set(IGDRCL_SRCS_tests_gen8_bdw
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bdw.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bdw.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bdw.cpp
)

View File

@ -1,29 +0,0 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/unit_test/mocks/mock_device.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_source_level_debugger.h"
#include "test.h"
using namespace NEO;
struct BdwDeviceTest : public ClDeviceFixture,
public ::testing::Test {
void SetUp() override {
ClDeviceFixture::SetUp();
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
};
BDWTEST_F(BdwDeviceTest, givenBdwDeviceWhenAskedForClVersionThenReport21) {
auto version = pClDevice->getEnabledClVersion();
EXPECT_EQ(21u, version);
}

View File

@ -14,12 +14,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> BdwDeviceCaps;
BDWTEST_F(BdwDeviceCaps, givenBdwDeviceWhenAskedForClVersionThenReport21) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
}
BDWTEST_F(BdwDeviceCaps, skuSpecificCaps) {
const auto &caps = pClDevice->getDeviceInfo();
std::string extensionString = caps.deviceExtensions;

View File

@ -12,11 +12,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> DeviceTest;
BXTTEST_F(DeviceTest, getEnabledClVersion12Device) {
auto version = pClDevice->getEnabledClVersion();
EXPECT_EQ(12u, version);
}
BXTTEST_F(DeviceTest, givenBxtDeviceWhenAskedForProflingTimerResolutionThen52IsReturned) {
auto resolution = pDevice->getProfilingTimerResolution();
EXPECT_DOUBLE_EQ(52.083, resolution);

View File

@ -12,23 +12,13 @@ using namespace NEO;
typedef Test<ClDeviceFixture> BxtDeviceCaps;
BXTTEST_F(BxtDeviceCaps, reportsOcl12) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
}
BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
}
BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) {
const auto &caps = pClDevice->getDeviceInfo();
BXTTEST_F(BxtDeviceCaps, givenBxtDeviceWhenAskedFor32BitSupportThenCorrectValuesAreReturned) {
const auto &sharedCaps = pDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
auto memoryManager = pDevice->getMemoryManager();
if (is64bit) {
EXPECT_TRUE(memoryManager->peekForce32BitAllocations());

View File

@ -12,12 +12,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> CflDeviceCaps;
CFLTEST_F(CflDeviceCaps, reportsOcl21) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
}
CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) {
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages);
}

View File

@ -21,12 +21,6 @@ GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsRetur
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64);
}
GLKTEST_F(Gen9DeviceCaps, GlkClVersionSupport) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
}
GLKTEST_F(Gen9DeviceCaps, GlkIs32BitOsAllocatorAvailable) {
const auto &caps = pDevice->getDeviceInfo();
auto memoryManager = pDevice->getMemoryManager();

View File

@ -12,12 +12,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> KblDeviceCaps;
KBLTEST_F(KblDeviceCaps, reportsOcl21) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
}
KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) {
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages);
}

View File

@ -12,11 +12,6 @@ using namespace NEO;
typedef Test<ClDeviceFixture> DeviceTest;
SKLTEST_F(DeviceTest, getEnabledClVersion21Device) {
auto version = pClDevice->getEnabledClVersion();
EXPECT_EQ(21u, version);
}
SKLTEST_F(DeviceTest, givenSklDeviceWhenAskedForProflingTimerResolutionThen83IsReturned) {
auto resolution = pDevice->getProfilingTimerResolution();
EXPECT_DOUBLE_EQ(83.333, resolution);

View File

@ -12,23 +12,13 @@ using namespace NEO;
typedef Test<ClDeviceFixture> SklDeviceCaps;
SKLTEST_F(SklDeviceCaps, reportsOcl21) {
const auto &caps = pClDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
}
SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) {
const auto &caps = pDevice->getDeviceInfo();
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
}
SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) {
const auto &caps = pClDevice->getDeviceInfo();
const auto &sharedCaps = pDevice->getDeviceInfo();
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
auto memoryManager = pDevice->getMemoryManager();
EXPECT_FALSE(memoryManager->peekForce32BitAllocations());
EXPECT_FALSE(sharedCaps.force32BitAddressess);

View File

@ -29,23 +29,3 @@ GEN9TEST_F(Gen9PlatformCaps, allSkusSupportFP64) {
EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64")));
}
}
GEN9TEST_F(Gen9PlatformCaps, SKLVersion) {
char *paramValue = new char[12];
cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr);
if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_SKYLAKE) {
EXPECT_STREQ(paramValue, "OpenCL 2.1 ");
}
EXPECT_EQ(retVal, CL_SUCCESS);
delete[] paramValue;
}
GEN9TEST_F(Gen9PlatformCaps, BXTVersion) {
char *paramValue = new char[12];
cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr);
if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_BROXTON) {
EXPECT_STREQ(paramValue, "OpenCL 1.2 ");
}
EXPECT_EQ(retVal, CL_SUCCESS);
delete[] paramValue;
}

View File

@ -1057,37 +1057,37 @@ typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest;
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeRequiredForExecutionModelForSurfaceStatesThenReturnSizeOfBlocksPlusMaxBindingTableSizeForAllIdtEntriesAndSchedulerSshSize) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
size_t totalSize = 0;
EXPECT_TRUE(pKernel->isParentKernel);
BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
size_t totalSize = 0;
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment
BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
uint32_t maxBindingTableCount = 0;
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint32_t maxBindingTableCount = 0;
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0);
}
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
auto &scheduler = pContext->getSchedulerKernel();
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0);
}
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
auto &scheduler = pContext->getSchedulerKernel();
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
}
static const char *binaryFile = "simple_block_kernel";

View File

@ -37,9 +37,7 @@ typedef ExecutionModelKernelFixture KernelReflectionSurfaceTest;
typedef ExecutionModelKernelTest KernelReflectionSurfaceWithQueueTest;
TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelThenKernelReflectionSurfaceIsNull) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface());
}
EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface());
}
TEST_P(KernelReflectionSurfaceTest, GivenEmptyKernelInfoWhenPassedToGetCurbeParamsThenEmptyVectorIsReturned) {
@ -486,127 +484,121 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithoutLocalMemoryParameterWh
}
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorIsSortedIncreasing) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
EXPECT_NE(0u, blockCount);
EXPECT_NE(0u, blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
}
if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
}
for (size_t i = 1; i < curbeParamsForBlock.size(); i++) {
EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType);
if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) {
for (size_t i = 1; i < curbeParamsForBlock.size(); i++) {
EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType);
if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) {
if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) {
EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset);
} else {
EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset);
}
if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) {
EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset);
} else {
EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset);
}
}
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
curbeParamsForBlock.resize(0);
}
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
curbeParamsForBlock.resize(0);
}
}
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorHasExpectedParamTypes) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
EXPECT_NE(0u, blockCount);
EXPECT_NE(0u, blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
const uint32_t bufferType = 49;
const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50;
const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100;
const uint32_t bufferType = 49;
const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50;
const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100;
bool bufferFound = false;
bool imageFound = false;
bool samplerFound = false;
bool bufferFound = false;
bool imageFound = false;
bool samplerFound = false;
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
for (const auto &curbeParams : curbeParamsForBlock) {
for (const auto &curbeParams : curbeParamsForBlock) {
switch (curbeParams.m_parameterType) {
case bufferType:
bufferFound = true;
break;
case imageType:
imageFound = true;
break;
case samplerType:
samplerFound = true;
break;
}
switch (curbeParams.m_parameterType) {
case bufferType:
bufferFound = true;
break;
case imageType:
imageFound = true;
break;
case samplerType:
samplerFound = true;
break;
}
EXPECT_TRUE(bufferFound);
EXPECT_TRUE(imageFound);
EXPECT_TRUE(samplerFound);
}
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
curbeParamsForBlock.resize(0);
EXPECT_TRUE(bufferFound);
EXPECT_TRUE(imageFound);
EXPECT_TRUE(samplerFound);
}
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
curbeParamsForBlock.resize(0);
}
}
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenTokenMaskIsCorrect) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
EXPECT_NE(0u, blockCount);
EXPECT_NE(0u, blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
EXPECT_LT(1u, curbeParamsForBlock.size());
const uint64_t bufferToken = (uint64_t)1 << 63;
const uint64_t imageToken = (uint64_t)1 << 50;
const uint64_t samplerToken = (uint64_t)1 << 51;
const uint64_t bufferToken = (uint64_t)1 << 63;
const uint64_t imageToken = (uint64_t)1 << 50;
const uint64_t samplerToken = (uint64_t)1 << 51;
uint64_t expectedTokens = bufferToken | imageToken | samplerToken;
EXPECT_NE(0u, tokenMask & expectedTokens);
}
curbeParamsForBlock.resize(0);
uint64_t expectedTokens = bufferToken | imageToken | samplerToken;
EXPECT_NE(0u, tokenMask & expectedTokens);
}
curbeParamsForBlock.resize(0);
}
}
@ -755,105 +747,103 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe
}
TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKernelReflectionSurfaceIsCorrect) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_TRUE(pKernel->isParentKernel);
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
EXPECT_EQ(3u, blockCount);
EXPECT_EQ(3u, blockCount);
size_t maxConstantBufferSize = 0;
size_t parentImageCount = 0;
size_t parentSamplerCount = 0;
size_t maxConstantBufferSize = 0;
size_t parentImageCount = 0;
size_t parentSamplerCount = 0;
if (pKernel->getKernelInfo().name == "kernel_reflection") {
parentImageCount = 1;
parentSamplerCount = 1;
}
if (pKernel->getKernelInfo().name == "kernel_reflection") {
parentImageCount = 1;
parentSamplerCount = 1;
}
size_t samplerStateArrayAndBorderColorTotalSize = 0;
size_t totalCurbeParamsSize = 0;
size_t samplerStateArrayAndBorderColorTotalSize = 0;
size_t totalCurbeParamsSize = 0;
std::vector<size_t> blockCurbeParamCounts(blockCount);
std::vector<size_t> samplerStateAndBorderColorSizes(blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::vector<size_t> blockCurbeParamCounts(blockCount);
std::vector<size_t> samplerStateAndBorderColorSizes(blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
for (size_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint64_t tokenMask = 0;
uint32_t firstSSHTokenIndex = 0;
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
blockCurbeParamCounts[i] = curbeParamsForBlock.size();
blockCurbeParamCounts[i] = curbeParamsForBlock.size();
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
totalCurbeParamsSize += blockCurbeParamCounts[i];
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
totalCurbeParamsSize += blockCurbeParamCounts[i];
size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo());
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize();
samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize;
size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo());
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize();
samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize;
samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *));
curbeParamsForBlock.clear();
}
samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *));
curbeParamsForBlock.clear();
}
totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams);
totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams);
size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *));
expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount;
expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount +
totalCurbeParamsSize +
parentImageCount * sizeof(IGIL_ImageParamters) +
parentSamplerCount * sizeof(IGIL_ParentSamplerParams) +
samplerStateArrayAndBorderColorTotalSize;
size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *));
expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount;
expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount +
totalCurbeParamsSize +
parentImageCount * sizeof(IGIL_ImageParamters) +
parentSamplerCount * sizeof(IGIL_ParentSamplerParams) +
samplerStateArrayAndBorderColorTotalSize;
pKernel->createReflectionSurface();
auto reflectionSurface = pKernel->getKernelReflectionSurface();
pKernel->createReflectionSurface();
auto reflectionSurface = pKernel->getKernelReflectionSurface();
ASSERT_NE(nullptr, reflectionSurface);
EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize());
ASSERT_NE(nullptr, reflectionSurface);
EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize());
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
uint32_t parentImages = 0;
uint32_t parentSamplers = 0;
uint32_t parentImages = 0;
uint32_t parentSamplers = 0;
if (pKernel->getKernelInfo().name == "kernel_reflection") {
parentImages = 1;
parentSamplers = 1;
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset);
}
if (pKernel->getKernelInfo().name == "kernel_reflection") {
parentImages = 1;
parentSamplers = 1;
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset);
}
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount);
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset);
EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount);
EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset);
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount);
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset);
EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount);
EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset);
// Curbe tokens
EXPECT_NE(0u, totalCurbeParamsSize);
// Curbe tokens
EXPECT_NE(0u, totalCurbeParamsSize);
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
EXPECT_NE(0u, addressData->m_KernelDataOffset);
EXPECT_NE(0u, addressData->m_BTSize);
EXPECT_NE(0u, addressData->m_SSHTokensOffset);
EXPECT_NE(0u, addressData->m_ConstantBufferOffset);
EXPECT_NE(0u, addressData->m_BTSoffset);
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
EXPECT_NE(0u, addressData->m_KernelDataOffset);
EXPECT_NE(0u, addressData->m_BTSize);
EXPECT_NE(0u, addressData->m_SSHTokensOffset);
EXPECT_NE(0u, addressData->m_ConstantBufferOffset);
EXPECT_NE(0u, addressData->m_BTSoffset);
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset)));
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset)));
EXPECT_NE_VAL(0u, kernelData->m_SIMDSize);
EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask);
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams);
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens);
EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer);
EXPECT_NE_VAL(0u, kernelData->m_SIMDSize);
EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask);
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams);
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens);
EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer);
for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) {
EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType);
}
for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) {
EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType);
}
}
}
@ -1026,138 +1016,130 @@ INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceTest,
::testing::ValuesIn(KernelNames)));
HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfacePatchesThenCurbeIsBlocked) {
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pPlatform->getClDevice(0));
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_NE(0u, blockCount);
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
EXPECT_NE(0u, blockCount);
pKernel->createReflectionSurface();
pKernel->patchReflectionSurface(pDevQueue, nullptr);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
ASSERT_NE(nullptr, reflectionSurface);
void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer();
pKernel->createReflectionSurface();
pKernel->patchReflectionSurface(pDevQueue, nullptr);
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
ASSERT_NE(nullptr, reflectionSurface);
void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer();
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset);
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset));
EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset);
void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset));
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) {
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset);
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
uint64_t patchedValue64 = *patchedValue;
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64);
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue);
}
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) {
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset);
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
uint64_t patchedValue64 = *patchedValue;
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64);
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue);
}
}
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
uint64_t patchedValue64 = *patchedValue;
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
}
}
for (const auto &arg : pBlockInfo->kernelArgInfo) {
if (arg.isDeviceQueue) {
auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset);
if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
uint64_t patchedValue64 = *patchedValue;
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
} else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
}
}
for (const auto &arg : pBlockInfo->kernelArgInfo) {
if (arg.isDeviceQueue) {
auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset);
if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
uint64_t patchedValue64 = *patchedValue;
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
} else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
}
}
}
}
}
}
HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfaceThenParentImageAndSamplersParamsAreSet) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
size_t blockCount = blockManager->getCount();
EXPECT_NE(0u, blockCount);
EXPECT_NE(0u, blockCount);
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
std::unique_ptr<Image> image3d(ImageHelper<Image3dDefaults>::create(context));
std::unique_ptr<Sampler> sampler(new MockSampler(context,
true,
(cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE,
(cl_filter_mode)CL_FILTER_LINEAR));
std::unique_ptr<Image> image3d(ImageHelper<Image3dDefaults>::create(context));
std::unique_ptr<Sampler> sampler(new MockSampler(context,
true,
(cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE,
(cl_filter_mode)CL_FILTER_LINEAR));
cl_sampler samplerCl = sampler.get();
cl_mem imageCl = image3d.get();
cl_sampler samplerCl = sampler.get();
cl_mem imageCl = image3d.get();
if (pKernel->getKernelInfo().name == "kernel_reflection") {
pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl);
pKernel->setArgImage(1, sizeof(cl_mem), &imageCl);
}
if (pKernel->getKernelInfo().name == "kernel_reflection") {
pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl);
pKernel->setArgImage(1, sizeof(cl_mem), &imageCl);
}
pKernel->createReflectionSurface();
pKernel->createReflectionSurface();
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
ASSERT_NE(nullptr, reflectionSurface);
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
ASSERT_NE(nullptr, reflectionSurface);
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
IGIL_ImageParamters *pParentImageParams = reinterpret_cast<IGIL_ImageParamters *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset));
IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast<IGIL_ParentSamplerParams *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset));
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
IGIL_ImageParamters *pParentImageParams = reinterpret_cast<IGIL_ImageParamters *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset));
IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast<IGIL_ParentSamplerParams *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset));
memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount);
memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount);
memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount);
memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount);
pKernel->patchReflectionSurface(pDevQueue, nullptr);
pKernel->patchReflectionSurface(pDevQueue, nullptr);
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
if (pKernelHeader->m_ParentKernelImageCount > 0) {
uint32_t imageIndex = 0;
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
if (arg.isImage) {
EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID);
imageIndex++;
}
if (pKernelHeader->m_ParentKernelImageCount > 0) {
uint32_t imageIndex = 0;
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
if (arg.isImage) {
EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID);
imageIndex++;
}
}
}
if (pKernelHeader->m_ParentSamplerCount > 0) {
uint32_t samplerIndex = 0;
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
if (arg.isSampler) {
EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID);
samplerIndex++;
}
if (pKernelHeader->m_ParentSamplerCount > 0) {
uint32_t samplerIndex = 0;
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
if (arg.isSampler) {
EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID);
samplerIndex++;
}
}
}

View File

@ -193,11 +193,8 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA
}
TEST_P(ParentKernelFromBinaryTest, GivenParentKernelWhenGettingInstructionHeapSizeForExecutionModelThenSizeIsGreaterThanZero) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel());
}
EXPECT_TRUE(pKernel->isParentKernel);
EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel());
}
static const char *binaryFile = "simple_block_kernel";

View File

@ -26,6 +26,7 @@
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/mocks/mock_source_level_debugger.h"
#include "opencl/test/unit_test/mocks/ult_cl_device_factory.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@ -96,14 +97,15 @@ TEST_F(PlatformTest, WhenGetClDevicesIsCalledThenExpectedValuesAreReturned) {
}
TEST_F(PlatformTest, givenSupportingCl21WhenGettingExtensionsStringThenSubgroupsIsEnabled) {
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
pPlatform->initializeWithNewDevices();
auto compilerExtensions = pPlatform->getClDevice(0)->peekCompilerExtensions();
auto isIndependentForwardProgressSupported = pPlatform->getClDevice(0)->getDeviceInfo().independentForwardProgress;
EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string(" -cl-ext=-all,+cl")));
if ((std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) &&
isIndependentForwardProgressSupported) {
if (isIndependentForwardProgressSupported) {
EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_subgroups")));
}
}

View File

@ -20,6 +20,7 @@
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/program/program_from_binary.h"
#include "opencl/test/unit_test/program/program_with_source.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
#include "gmock/gmock.h"
@ -180,87 +181,87 @@ class ProgramNonUniformTest : public ContextFixture,
};
TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
auto mockProgram = (MockProgram *)pProgram;
ASSERT_NE(nullptr, mockProgram);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
mockProgram->setBuildOptions("-cl-std=CL2.1");
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
auto mockProgram = (MockProgram *)pProgram;
ASSERT_NE(nullptr, mockProgram);
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
EXPECT_NE(nullptr, pKernelInfo);
mockProgram->setBuildOptions("-cl-std=CL2.1");
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
// create a kernel
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
EXPECT_NE(nullptr, pKernelInfo);
size_t globalWorkSize[3] = {12, 12, 12};
size_t localWorkSize[3] = {11, 12, 1};
// create a kernel
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
retVal = pCmdQ->enqueueKernel(
pKernel,
3,
nullptr,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t globalWorkSize[3] = {12, 12, 12};
size_t localWorkSize[3] = {11, 12, 1};
delete pKernel;
}
retVal = pCmdQ->enqueueKernel(
pKernel,
3,
nullptr,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
delete pKernel;
}
TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) {
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
auto mockProgram = pProgram;
ASSERT_NE(nullptr, mockProgram);
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
mockProgram->setBuildOptions("-cl-std=CL2.0");
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
auto mockProgram = pProgram;
ASSERT_NE(nullptr, mockProgram);
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
EXPECT_NE(nullptr, pKernelInfo);
mockProgram->setBuildOptions("-cl-std=CL2.0");
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
// create a kernel
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
EXPECT_NE(nullptr, pKernelInfo);
size_t globalWorkSize[3] = {12, 12, 12};
size_t localWorkSize[3] = {11, 12, 12};
// create a kernel
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
retVal = pCmdQ->enqueueKernel(
pKernel,
3,
nullptr,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t globalWorkSize[3] = {12, 12, 12};
size_t localWorkSize[3] = {11, 12, 1};
delete pKernel;
}
retVal = pCmdQ->enqueueKernel(
pKernel,
3,
nullptr,
globalWorkSize,
localWorkSize,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
delete pKernel;
}
TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInvalidWorkGroupSizeIsReturned) {

View File

@ -16,6 +16,7 @@
#include "opencl/test/unit_test/fixtures/run_kernel_fixture.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include <vector>
@ -38,6 +39,7 @@ class ProgramWithBlockKernelsTest : public ContextFixture,
device = pPlatform->getClDevice(0);
ContextFixture::SetUp(1, &device);
ProgramFixture::SetUp();
REQUIRE_OCL_21_OR_SKIP(pContext);
}
void TearDown() override {
@ -50,85 +52,77 @@ class ProgramWithBlockKernelsTest : public ContextFixture,
};
TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsBuildingThenKernelInfosHaveCorrectNames) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
auto mockProgram = (MockProgram *)pProgram;
ASSERT_NE(nullptr, mockProgram);
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
auto mockProgram = (MockProgram *)pProgram;
ASSERT_NE(nullptr, mockProgram);
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = mockProgram->build(
1,
&device,
nullptr,
nullptr,
nullptr,
false);
EXPECT_EQ(CL_SUCCESS, retVal);
auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel");
EXPECT_NE(nullptr, kernelInfo);
auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel");
EXPECT_NE(nullptr, kernelInfo);
auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0");
EXPECT_EQ(nullptr, blockKernelInfo);
auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0");
EXPECT_EQ(nullptr, blockKernelInfo);
std::vector<const KernelInfo *> blockKernelInfos(mockProgram->blockKernelManager->getCount());
std::vector<const KernelInfo *> blockKernelInfos(mockProgram->blockKernelManager->getCount());
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i);
EXPECT_NE(nullptr, blockKernelInfo);
blockKernelInfos[i] = blockKernelInfo;
}
bool blockKernelFound = false;
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) {
blockKernelFound = true;
break;
}
}
EXPECT_TRUE(blockKernelFound);
} else {
EXPECT_EQ(nullptr, pProgram);
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i);
EXPECT_NE(nullptr, blockKernelInfo);
blockKernelInfos[i] = blockKernelInfo;
}
bool blockKernelFound = false;
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) {
blockKernelFound = true;
break;
}
}
EXPECT_TRUE(blockKernelFound);
}
TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsLinkedThenBlockKernelsAreSeparated) {
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) {
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
const char *buildOptions = "-cl-std=CL2.0";
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
const char *buildOptions = "-cl-std=CL2.0";
overwriteBuiltInBinaryName(
&pPlatform->getClDevice(0)->getDevice(),
"simple_block_kernel", true);
overwriteBuiltInBinaryName(
&pPlatform->getClDevice(0)->getDevice(),
"simple_block_kernel", true);
ASSERT_NE(nullptr, pProgram);
ASSERT_NE(nullptr, pProgram);
EXPECT_EQ(CL_SUCCESS, retVal);
Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr);
cl_program program = pProgram;
EXPECT_EQ(CL_SUCCESS, retVal);
Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr);
cl_program program = pProgram;
retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr);
retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
BlockKernelManager *blockManager = programLinked->getBlockKernelManager();
BlockKernelManager *blockManager = programLinked->getBlockKernelManager();
EXPECT_NE(0u, blockManager->getCount());
EXPECT_NE(0u, blockManager->getCount());
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
const KernelInfo *info = blockManager->getBlockKernelInfo(i);
if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) {
break;
}
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
const KernelInfo *info = blockManager->getBlockKernelInfo(i);
if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) {
break;
}
restoreBuiltInBinaryName(nullptr);
delete programLinked;
} else {
EXPECT_EQ(nullptr, pProgram);
}
restoreBuiltInBinaryName(nullptr);
delete programLinked;
}
} // namespace NEO