mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Enable OpenCL 3.0 by default on all devices
Change-Id: Ic5e46177c957896c499b7aa6727af48105b664ac Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
595f374634
commit
7d0f23bfc9
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@ -91,37 +92,37 @@ INSTANTIATE_TEST_CASE_P(wgs,
|
||||
::testing::ValuesIn(WorkDimensions)));
|
||||
|
||||
TEST_P(KernelSubGroupInfoReturnSizeTest, GivenWorkGroupSizeWhenGettingMaxSubGroupSizeThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
WorkSizeParam workSize;
|
||||
size_t workDim;
|
||||
std::tie(workSize, workDim) = GetParam();
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
memset(inputValue, 0, sizeof(inputValue));
|
||||
inputValue[0] = workSize.x;
|
||||
if (workDim > 1) {
|
||||
inputValue[1] = workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
inputValue[2] = workSize.z;
|
||||
}
|
||||
paramValueSizeRet = 0;
|
||||
WorkSizeParam workSize;
|
||||
size_t workDim;
|
||||
std::tie(workSize, workDim) = GetParam();
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t) * workDim,
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
|
||||
EXPECT_EQ(maxSimdSize, paramValue[0]);
|
||||
memset(inputValue, 0, sizeof(inputValue));
|
||||
inputValue[0] = workSize.x;
|
||||
if (workDim > 1) {
|
||||
inputValue[1] = workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
inputValue[2] = workSize.z;
|
||||
}
|
||||
paramValueSizeRet = 0;
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t) * workDim,
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
|
||||
EXPECT_EQ(maxSimdSize, paramValue[0]);
|
||||
}
|
||||
|
||||
typedef KernelSubGroupInfoParamFixture<std::tuple<WorkSizeParam, size_t>> KernelSubGroupInfoReturnCountTest;
|
||||
@ -133,48 +134,48 @@ INSTANTIATE_TEST_CASE_P(wgs,
|
||||
::testing::ValuesIn(WorkDimensions)));
|
||||
|
||||
TEST_P(KernelSubGroupInfoReturnCountTest, GivenWorkGroupSizeWhenGettingSubGroupCountThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
WorkSizeParam workSize;
|
||||
size_t workDim;
|
||||
std::tie(workSize, workDim) = GetParam();
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
memset(inputValue, 0, sizeof(inputValue));
|
||||
inputValue[0] = workSize.x;
|
||||
if (workDim > 1) {
|
||||
inputValue[1] = workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
inputValue[2] = workSize.z;
|
||||
}
|
||||
paramValueSizeRet = 0;
|
||||
WorkSizeParam workSize;
|
||||
size_t workDim;
|
||||
std::tie(workSize, workDim) = GetParam();
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
|
||||
sizeof(size_t) * workDim,
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
memset(inputValue, 0, sizeof(inputValue));
|
||||
inputValue[0] = workSize.x;
|
||||
if (workDim > 1) {
|
||||
inputValue[1] = workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
inputValue[2] = workSize.z;
|
||||
}
|
||||
paramValueSizeRet = 0;
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
|
||||
sizeof(size_t) * workDim,
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(sizeof(size_t), paramValueSizeRet);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto calculatedWGS = workSize.x;
|
||||
if (workDim > 1) {
|
||||
calculatedWGS *= workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
calculatedWGS *= workSize.z;
|
||||
}
|
||||
EXPECT_EQ(sizeof(size_t), paramValueSizeRet);
|
||||
|
||||
if (calculatedWGS % maxSimdSize == 0) {
|
||||
EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]);
|
||||
} else {
|
||||
EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]);
|
||||
}
|
||||
auto calculatedWGS = workSize.x;
|
||||
if (workDim > 1) {
|
||||
calculatedWGS *= workSize.y;
|
||||
}
|
||||
if (workDim > 2) {
|
||||
calculatedWGS *= workSize.z;
|
||||
}
|
||||
|
||||
if (calculatedWGS % maxSimdSize == 0) {
|
||||
EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]);
|
||||
} else {
|
||||
EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,158 +190,158 @@ INSTANTIATE_TEST_CASE_P(sgn,
|
||||
::testing::ValuesIn(WorkDimensions)));
|
||||
|
||||
TEST_P(KernelSubGroupInfoReturnLocalSizeTest, GivenWorkGroupSizeWhenGettingLocalSizeThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
size_t subGroupsNum;
|
||||
size_t workDim;
|
||||
std::tie(subGroupsNum, workDim) = GetParam();
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
inputValue[0] = subGroupsNum;
|
||||
size_t subGroupsNum;
|
||||
size_t workDim;
|
||||
std::tie(subGroupsNum, workDim) = GetParam();
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t) * workDim,
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
inputValue[0] = subGroupsNum;
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t) * workDim,
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize;
|
||||
if (workGroupSize > calculatedMaxWorkgroupSize) {
|
||||
workGroupSize = 0;
|
||||
}
|
||||
EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet);
|
||||
|
||||
EXPECT_EQ(workGroupSize, paramValue[0]);
|
||||
if (workDim > 1) {
|
||||
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]);
|
||||
}
|
||||
if (workDim > 2) {
|
||||
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]);
|
||||
}
|
||||
size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize;
|
||||
if (workGroupSize > calculatedMaxWorkgroupSize) {
|
||||
workGroupSize = 0;
|
||||
}
|
||||
|
||||
EXPECT_EQ(workGroupSize, paramValue[0]);
|
||||
if (workDim > 1) {
|
||||
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]);
|
||||
}
|
||||
if (workDim > 2) {
|
||||
EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]);
|
||||
}
|
||||
}
|
||||
|
||||
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnMaxNumberTest;
|
||||
|
||||
TEST_F(KernelSubGroupInfoReturnMaxNumberTest, GivenWorkGroupSizeWhenGettingMaxNumSubGroupsThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_MAX_NUM_SUB_GROUPS,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize));
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_MAX_NUM_SUB_GROUPS,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize));
|
||||
}
|
||||
|
||||
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileNumberTest;
|
||||
|
||||
TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileNumSubGroupThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber));
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(paramValue[0], static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber));
|
||||
}
|
||||
|
||||
typedef KernelSubGroupInfoParamFixture<WorkSizeParam> KernelSubGroupInfoReturnCompileSizeTest;
|
||||
|
||||
TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSubGroupSizeThenReturnIsCalculatedCorrectly) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL,
|
||||
0,
|
||||
nullptr,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
¶mValueSizeRet);
|
||||
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
size_t requiredSubGroupSize = 0;
|
||||
auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = pKernel->getKernelInfo().attributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start));
|
||||
}
|
||||
EXPECT_EQ(paramValueSizeRet, sizeof(size_t));
|
||||
|
||||
EXPECT_EQ(paramValue[0], requiredSubGroupSize);
|
||||
size_t requiredSubGroupSize = 0;
|
||||
auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size(");
|
||||
if (start != std::string::npos) {
|
||||
start += strlen("intel_reqd_sub_group_size(");
|
||||
auto stop = pKernel->getKernelInfo().attributes.find(")", start);
|
||||
requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start));
|
||||
}
|
||||
|
||||
EXPECT_EQ(paramValue[0], requiredSubGroupSize);
|
||||
}
|
||||
|
||||
TEST_F(KernelSubGroupInfoTest, GivenNullKernelWhenGettingSubGroupInfoThenInvalidKernelErrorIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
nullptr,
|
||||
pClDevice,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
nullptr,
|
||||
pClDevice,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_KERNEL, retVal);
|
||||
}
|
||||
|
||||
TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoThenInvalidDeviceErrorIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_DEVICE, retVal);
|
||||
}
|
||||
|
||||
TEST_F(KernelSubGroupInfoTest, GivenInvalidParamNameWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
0,
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
nullptr);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
0,
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t),
|
||||
paramValue,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
|
||||
uint32_t /*cl_kernel_sub_group_info*/ KernelSubGroupInfoInputParams[] = {
|
||||
@ -385,167 +386,166 @@ TEST_P(KernelSubGroupInfoInputParamsTest, GivenOpenClVersionLowerThan21WhenGetti
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimZeroWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
0,
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
0,
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenIndivisibleWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
(sizeof(size_t) * workDim) - 1,
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
(sizeof(size_t) * workDim) - 1,
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimGreaterThanMaxWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t) * (workDim + 1),
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t) * (workDim + 1),
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenInputValueIsNullWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t) * (workDim),
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) ||
|
||||
(GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE))
|
||||
? maxWorkDim
|
||||
: 1;
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t) * (workDim),
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenParamValueSizeZeroWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
0,
|
||||
paramValue,
|
||||
nullptr);
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
0,
|
||||
paramValue,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenUnalignedParamValueSizeWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
(sizeof(size_t) * workDim) - 1,
|
||||
paramValue,
|
||||
nullptr);
|
||||
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
|
||||
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
(sizeof(size_t) * workDim) - 1,
|
||||
paramValue,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenTooLargeParamValueSizeWhenGettingSubGroupInfoThenCorrectRetValIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
// paramValue size / sizeof(size_t) > MaxWorkDim
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t) * (workDim + 1),
|
||||
paramValue,
|
||||
nullptr);
|
||||
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1;
|
||||
|
||||
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
// paramValue size / sizeof(size_t) > MaxWorkDim
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
sizeof(size_t) * (workDim + 1),
|
||||
paramValue,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_P(KernelSubGroupInfoInputParamsTest, GivenNullPtrForReturnWhenGettingKernelSubGroupInfoThenSuccessIsReturned) {
|
||||
if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT);
|
||||
|
||||
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
retVal = clGetKernelSubGroupInfo(
|
||||
pKernel,
|
||||
pClDevice,
|
||||
GetParam(),
|
||||
sizeof(size_t),
|
||||
inputValue,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal);
|
||||
}
|
||||
} // namespace ULT
|
||||
|
@ -127,8 +127,9 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) {
|
||||
EXPECT_NE(nullptr, caps.vendor);
|
||||
EXPECT_NE(nullptr, caps.driverVersion);
|
||||
EXPECT_NE(nullptr, caps.profile);
|
||||
EXPECT_NE(nullptr, caps.clVersion);
|
||||
EXPECT_NE(nullptr, caps.clCVersion);
|
||||
EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion);
|
||||
auto expectedClCVersion = (device->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 ");
|
||||
EXPECT_STREQ(expectedClCVersion, caps.clCVersion);
|
||||
EXPECT_NE(0u, caps.numericClVersion);
|
||||
EXPECT_GT(caps.openclCAllVersions.size(), 0u);
|
||||
EXPECT_GT(caps.openclCFeatures.size(), 0u);
|
||||
@ -1196,9 +1197,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseCertainWorkgroupSizeWhenDeviceIsCreatedIt
|
||||
}
|
||||
|
||||
TEST(DeviceGetCaps, givenDebugFlagToDisableDeviceEnqueuesWhenCreatingDeviceThenDeviceQueueCapsAreSetCorrectly) {
|
||||
if (defaultHwInfo->capabilityTable.clVersionSupport == 21) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.ForceDeviceEnqueueSupport.set(0);
|
||||
|
@ -483,8 +483,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOff
|
||||
class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
|
||||
public:
|
||||
void SetUp() override {
|
||||
ExecutionModelKernelFixture::SetUp();
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
ExecutionModelKernelFixture::SetUp();
|
||||
cl_queue_properties properties[5] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
|
||||
@ -503,6 +504,10 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
|
||||
ASSERT_NE(nullptr, devQueue);
|
||||
}
|
||||
void TearDown() override {
|
||||
if (IsSkipped()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (devQueue) {
|
||||
delete devQueue;
|
||||
}
|
||||
@ -522,130 +527,122 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture {
|
||||
};
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
auto usedBeforeSSH = ssh->getUsed();
|
||||
auto usedBeforeDSH = dsh->getUsed();
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
auto usedBeforeSSH = ssh->getUsed();
|
||||
auto usedBeforeDSH = dsh->getUsed();
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
|
||||
auto usedAfterSSH = ssh->getUsed();
|
||||
auto usedAfterDSH = dsh->getUsed();
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false);
|
||||
auto usedAfterSSH = ssh->getUsed();
|
||||
auto usedAfterDSH = dsh->getUsed();
|
||||
|
||||
EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH);
|
||||
EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH);
|
||||
|
||||
EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH);
|
||||
EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
auto *devQueueHw = castToObject<DeviceQueueHw<FamilyType>>(devQueue);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
uint32_t parentCount = 4;
|
||||
uint32_t parentCount = 4;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||
EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
MockContext mockContext;
|
||||
MockDeviceQueueHw<FamilyType> *devQueueHw = new MockDeviceQueueHw<FamilyType>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
MockContext mockContext;
|
||||
MockDeviceQueueHw<FamilyType> *devQueueHw = new MockDeviceQueueHw<FamilyType>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
ASSERT_NE(nullptr, devQueueHw);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
ASSERT_NE(nullptr, dsh);
|
||||
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
uint32_t parentCount = 1;
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
auto *igilQueue = reinterpret_cast<IGIL_CommandQueue *>(devQueueHw->getQueueBuffer()->getUnderlyingBuffer());
|
||||
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize());
|
||||
EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE));
|
||||
EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh);
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
delete devQueueHw;
|
||||
}
|
||||
alignedFree(ssh->getCpuBase());
|
||||
delete ssh;
|
||||
delete devQueueHw;
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
MockContext mockContext;
|
||||
auto devQueueHw = std::make_unique<MockDeviceQueueHw<FamilyType>>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
MockContext mockContext;
|
||||
auto devQueueHw = std::make_unique<MockDeviceQueueHw<FamilyType>>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]);
|
||||
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
uint32_t parentCount = 1;
|
||||
uint32_t parentCount = 1;
|
||||
|
||||
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto iddCount = blockManager->getCount();
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
|
||||
}
|
||||
|
||||
auto surfaceStateHeapSize =
|
||||
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
|
||||
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
|
||||
auto iddStartIndex = parentCount;
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable());
|
||||
}
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
auto blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto iddCount = blockManager->getCount();
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u;
|
||||
}
|
||||
|
||||
auto surfaceStateHeapSize =
|
||||
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
|
||||
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
|
||||
|
||||
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);
|
||||
|
||||
auto iddStartPtr = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize));
|
||||
auto iddStartIndex = parentCount;
|
||||
for (uint32_t i = 0; i < iddCount; i++) {
|
||||
EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable());
|
||||
}
|
||||
|
||||
alignedFree(ssh->getCpuBase());
|
||||
}
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
|
@ -38,267 +38,250 @@ typedef ExecutionModelKernelTest ParentKernelEnqueueTest;
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
||||
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
|
||||
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
||||
void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer();
|
||||
|
||||
EXPECT_NE(nullptr, executionModelDsh);
|
||||
EXPECT_NE(nullptr, executionModelDsh);
|
||||
|
||||
INTERFACE_DESCRIPTOR_DATA *idData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize));
|
||||
INTERFACE_DESCRIPTOR_DATA *idData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize));
|
||||
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
|
||||
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
|
||||
auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation();
|
||||
auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch();
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
EXPECT_NE(0u, idData[0].getSamplerCount());
|
||||
EXPECT_NE(0u, idData[0].getSamplerStatePointer());
|
||||
}
|
||||
|
||||
EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress), idData[0].getKernelStartPointer());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh());
|
||||
|
||||
const uint32_t blockFirstIndex = 1;
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
|
||||
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
|
||||
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
|
||||
EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer());
|
||||
|
||||
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
|
||||
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
EXPECT_NE(0u, idData[0].getSamplerCount());
|
||||
EXPECT_NE(0u, idData[0].getSamplerStatePointer());
|
||||
}
|
||||
|
||||
EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress), idData[0].getKernelStartPointer());
|
||||
EXPECT_EQ(static_cast<uint32_t>(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh());
|
||||
|
||||
const uint32_t blockFirstIndex = 1;
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize;
|
||||
|
||||
const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize;
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload);
|
||||
auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels);
|
||||
uint32_t numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
|
||||
EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength());
|
||||
EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer());
|
||||
|
||||
uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer();
|
||||
uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch();
|
||||
|
||||
auto &hardwareInfo = pKernel->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) {
|
||||
expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
|
||||
}
|
||||
EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress);
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsr<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateSurface == nullptr) {
|
||||
privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateSurface == nullptr) {
|
||||
privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount();
|
||||
for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) {
|
||||
if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) {
|
||||
kernelRequiringPrivateSurface = i;
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateAllocation == nullptr) {
|
||||
privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
|
||||
EXPECT_FALSE(csr.isMadeResident(privateAllocation));
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
|
||||
}
|
||||
|
||||
ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount());
|
||||
|
||||
auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface);
|
||||
|
||||
if (privateAllocation == nullptr) {
|
||||
privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface);
|
||||
}
|
||||
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
|
||||
EXPECT_FALSE(csr.isMadeResident(privateAllocation));
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
EXPECT_TRUE(csr.isMadeResident(privateAllocation));
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
blockKernelManager->makeInternalAllocationsResident(csr);
|
||||
blockKernelManager->makeInternalAllocationsResident(csr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) {
|
||||
if (pClDevice->areOcl21FeaturesSupported()) {
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
size_t offset[3] = {0, 0, 0};
|
||||
size_t gws[3] = {1, 1, 1};
|
||||
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.storeMakeResidentAllocations = true;
|
||||
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
auto uEvent = make_releaseable<UserEvent>(pContext);
|
||||
auto clEvent = static_cast<cl_event>(uEvent.get());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr);
|
||||
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
auto blockCount = blockKernelManager->getCount();
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
uEvent->setStatus(CL_COMPLETE);
|
||||
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
for (auto blockId = 0u; blockId < blockCount; blockId++) {
|
||||
EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
size_t executionModelDSHUsedBefore = dsh->getUsed();
|
||||
auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
size_t executionModelDSHUsedBefore = dsh->getUsed();
|
||||
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize;
|
||||
EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
size_t executionModelDSHUsedAfterFirst = dsh->getUsed();
|
||||
EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst);
|
||||
size_t executionModelDSHUsedAfterFirst = dsh->getUsed();
|
||||
EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst);
|
||||
|
||||
pDevQueueHw->resetDeviceQueue();
|
||||
pDevQueueHw->resetDeviceQueue();
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
size_t executionModelDSHUsedAfterSecond = dsh->getUsed();
|
||||
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
|
||||
}
|
||||
size_t executionModelDSHUsedAfterSecond = dsh->getUsed();
|
||||
EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
pKernel->createReflectionSurface();
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
|
||||
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
EXPECT_EQ(ssh, ssh2);
|
||||
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
|
||||
}
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
EXPECT_EQ(ssh, ssh2);
|
||||
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) {
|
||||
@ -306,64 +289,61 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
// prealign the ssh so that it won't need to be realigned in enqueueKernel
|
||||
// this way, we can assume the location in memory into which the surface states
|
||||
// will be coies
|
||||
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
// prealign the ssh so that it won't need to be realigned in enqueueKernel
|
||||
// this way, we can assume the location in memory into which the surface states
|
||||
// will be coies
|
||||
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
// mark the assumed place for surface states
|
||||
size_t parentSshOffset = 0;
|
||||
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
// mark the assumed place for surface states
|
||||
size_t parentSshOffset = 0;
|
||||
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
|
||||
|
||||
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
|
||||
ASSERT_NE(nullptr, pBlockInfo);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment);
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload);
|
||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
|
||||
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
if (blockKernel->getNumberOfBindingTableStates() > 0) {
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
|
||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
||||
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
|
||||
|
||||
Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr);
|
||||
blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
if (blockKernel->getNumberOfBindingTableStates() > 0) {
|
||||
ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState);
|
||||
auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE);
|
||||
auto dstBindingTable = reinterpret_cast<BINDING_TABLE_STATE *>(dstBlockBti);
|
||||
|
||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
|
||||
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
|
||||
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
||||
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
|
||||
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer));
|
||||
auto *srcSurfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer));
|
||||
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
|
||||
}
|
||||
|
||||
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
|
||||
auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
auto srcBindingTable = reinterpret_cast<const BINDING_TABLE_STATE *>(srcBlockBti);
|
||||
for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) {
|
||||
uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer();
|
||||
uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer();
|
||||
auto *dstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer));
|
||||
auto *srcSurfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer));
|
||||
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
|
||||
}
|
||||
|
||||
delete blockKernel;
|
||||
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
|
||||
}
|
||||
|
||||
delete blockKernel;
|
||||
}
|
||||
}
|
||||
|
||||
@ -372,86 +352,78 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
|
||||
}
|
||||
EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface());
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
cl_queue_properties properties[3] = {0};
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
cl_queue_properties properties[3] = {0};
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, pClDevice, properties[0]);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockDeviceQueueHw<FamilyType> mockDevQueue(context, pClDevice, properties[0]);
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
// Acquire CS to check if reset queue was called
|
||||
mockDevQueue.acquireEMCriticalSection();
|
||||
|
||||
context->setDefaultDeviceQueue(&mockDevQueue);
|
||||
// Acquire CS to check if reset queue was called
|
||||
mockDevQueue.acquireEMCriticalSection();
|
||||
auto mockEvent = make_releaseable<UserEvent>(context);
|
||||
|
||||
auto mockEvent = make_releaseable<UserEvent>(context);
|
||||
cl_event eventBlocking = mockEvent.get();
|
||||
|
||||
cl_event eventBlocking = mockEvent.get();
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr);
|
||||
|
||||
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
|
||||
}
|
||||
EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree());
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident) {
|
||||
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
ASSERT_NE(nullptr, pDevQueueHw);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
ASSERT_NE(nullptr, pDevQueueHw);
|
||||
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsrBase<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
int32_t executionStamp = 0;
|
||||
auto mockCSR = new MockCsrBase<FamilyType>(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex());
|
||||
pDevice->resetCommandStreamReceiver(mockCSR);
|
||||
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
|
||||
|
||||
auto &cmdStream = mockCSR->getCS(0);
|
||||
auto &cmdStream = mockCSR->getCS(0);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
hwParser.findHardwareCommands<FamilyType>();
|
||||
|
||||
auto stateBaseAddressItor = hwParser.itorStateBaseAddress;
|
||||
auto stateBaseAddressItor = hwParser.itorStateBaseAddress;
|
||||
|
||||
ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor);
|
||||
ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor);
|
||||
|
||||
auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor;
|
||||
auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor;
|
||||
|
||||
uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress();
|
||||
uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress();
|
||||
|
||||
EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress());
|
||||
EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress());
|
||||
|
||||
bool dshAllocationResident = false;
|
||||
bool dshAllocationResident = false;
|
||||
|
||||
for (auto allocation : mockCSR->madeResidentGfxAllocations) {
|
||||
if (allocation == pDevQueue->getDshBuffer()) {
|
||||
dshAllocationResident = true;
|
||||
break;
|
||||
}
|
||||
for (auto allocation : mockCSR->madeResidentGfxAllocations) {
|
||||
if (allocation == pDevQueue->getDshBuffer()) {
|
||||
dshAllocationResident = true;
|
||||
break;
|
||||
}
|
||||
EXPECT_TRUE(dshAllocationResident);
|
||||
}
|
||||
EXPECT_TRUE(dshAllocationResident);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest,
|
||||
|
@ -28,167 +28,155 @@ static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"};
|
||||
typedef ExecutionModelKernelTest ParentKernelDispatchTest;
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedBefore);
|
||||
size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedBefore);
|
||||
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedAfter);
|
||||
size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
|
||||
EXPECT_EQ(0u, dshUsedAfter);
|
||||
|
||||
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
|
||||
}
|
||||
size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
|
||||
EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ish = &getIndirectHeap<FamilyType, IndirectHeap::DYNAMIC_STATE>(*pCmdQ, multiDispatchInfo);
|
||||
auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
EXPECT_EQ(ishOfDevQueue, ish);
|
||||
}
|
||||
EXPECT_EQ(ishOfDevQueue, ish);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
|
||||
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
auto ioh = &getIndirectHeap<FamilyType, IndirectHeap::INDIRECT_OBJECT>(*pCmdQ, multiDispatchInfo);
|
||||
auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
|
||||
EXPECT_EQ(dshOfDevQueue, ioh);
|
||||
}
|
||||
EXPECT_EQ(dshOfDevQueue, ioh);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
auto iohUsed = ioh.getUsed();
|
||||
EXPECT_EQ(0u, iohUsed);
|
||||
}
|
||||
auto iohUsed = ioh.getUsed();
|
||||
EXPECT_EQ(0u, iohUsed);
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
MockMultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
|
||||
auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
|
||||
|
||||
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
|
||||
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
|
||||
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
|
||||
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
|
||||
}
|
||||
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
const size_t globalOffsets[3] = {0, 0, 0};
|
||||
const size_t workItems[3] = {1, 1, 1};
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
MultiDispatchInfo multiDispatchInfo(pKernel);
|
||||
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
ASSERT_NE(nullptr, blockedCommandsData);
|
||||
DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
blockedCommandsData.get(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
CL_COMMAND_NDRANGE_KERNEL);
|
||||
ASSERT_NE(nullptr, blockedCommandsData);
|
||||
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
|
||||
|
||||
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
||||
size_t sshUsed = blockedCommandsData->ssh->getUsed();
|
||||
|
||||
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) +
|
||||
pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
|
||||
UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) +
|
||||
pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) +
|
||||
UnitTestHelper<FamilyType>::getDefaultSshUsage();
|
||||
|
||||
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
||||
}
|
||||
|
||||
EXPECT_GE(minRequiredSize, sshUsed);
|
||||
// Total SSH size including EM must be greater then ssh allocated
|
||||
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
|
||||
if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
EXPECT_EQ(expectedSizeSSH, sshUsed);
|
||||
}
|
||||
|
||||
EXPECT_GE(minRequiredSize, sshUsed);
|
||||
// Total SSH size including EM must be greater then ssh allocated
|
||||
EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest,
|
||||
|
@ -53,6 +53,8 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture,
|
||||
public DeviceQueueFixture {
|
||||
public:
|
||||
void SetUp() override {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
DebugManager.flags.EnableTimestampPacket.set(0);
|
||||
ExecutionModelKernelFixture::SetUp();
|
||||
CommandQueueHwFixture::SetUp(pClDevice, 0);
|
||||
@ -60,10 +62,11 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture,
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
||||
DeviceQueueFixture::TearDown();
|
||||
CommandQueueHwFixture::TearDown();
|
||||
ExecutionModelKernelFixture::TearDown();
|
||||
if (!IsSkipped()) {
|
||||
DeviceQueueFixture::TearDown();
|
||||
CommandQueueHwFixture::TearDown();
|
||||
ExecutionModelKernelFixture::TearDown();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/program/program_from_binary.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@ -19,17 +20,10 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest,
|
||||
public PlatformFixture {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
PlatformFixture::SetUp();
|
||||
|
||||
std::string temp;
|
||||
temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion);
|
||||
|
||||
if (temp.find("OpenCL 1.2") != std::string::npos) {
|
||||
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
|
||||
pClDevice = new MockClDevice{pDevice};
|
||||
return;
|
||||
}
|
||||
|
||||
std::string options("-cl-std=CL2.0");
|
||||
this->setOptions(options);
|
||||
ProgramFromBinaryTest::SetUp();
|
||||
@ -58,25 +52,23 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest,
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (IsSkipped()) {
|
||||
return;
|
||||
}
|
||||
if (pKernel != nullptr) {
|
||||
pKernel->release();
|
||||
}
|
||||
|
||||
std::string temp;
|
||||
temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion);
|
||||
|
||||
ProgramFromBinaryTest::TearDown();
|
||||
PlatformFixture::TearDown();
|
||||
|
||||
if (temp.find("OpenCL 1.2") != std::string::npos) {
|
||||
if (pDevice != nullptr) {
|
||||
delete pDevice;
|
||||
pDevice = nullptr;
|
||||
}
|
||||
if (pClDevice != nullptr) {
|
||||
delete pClDevice;
|
||||
pClDevice = nullptr;
|
||||
}
|
||||
if (pDevice != nullptr) {
|
||||
delete pDevice;
|
||||
pDevice = nullptr;
|
||||
}
|
||||
if (pClDevice != nullptr) {
|
||||
delete pClDevice;
|
||||
pClDevice = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,12 +16,6 @@ LKFTEST_F(LkfTest, givenLkfWhenSlmSizeIsRequiredThenReturnCorrectValue) {
|
||||
EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize);
|
||||
}
|
||||
|
||||
LKFTEST_F(LkfTest, givenLKFWhenCheckedOCLVersionThen21IsReported) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
LKFTEST_F(LkfTest, givenLKFWhenCheckedSvmSupportThenNoSvmIsReported) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_EQ(caps.svmCapabilities, 0u);
|
||||
|
@ -14,13 +14,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> Gen12LpDeviceCaps;
|
||||
|
||||
GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpDeviceWhenQueryingDeviceInfoThenOcl30IsReported) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
auto expectedClCVersion = (pClDevice->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 ");
|
||||
EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ(expectedClCVersion, caps.clCVersion);
|
||||
}
|
||||
|
||||
HWTEST2_F(Gen12LpDeviceCaps, lpSkusDontSupportFP64, IsTGLLP) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
std::string extensionString = caps.deviceExtensions;
|
||||
|
@ -7,7 +7,6 @@
|
||||
if(TESTS_BDW)
|
||||
set(IGDRCL_SRCS_tests_gen8_bdw
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bdw.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bdw.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bdw.cpp
|
||||
)
|
||||
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/unit_test/mocks/mock_device.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_source_level_debugger.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
struct BdwDeviceTest : public ClDeviceFixture,
|
||||
public ::testing::Test {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
};
|
||||
|
||||
BDWTEST_F(BdwDeviceTest, givenBdwDeviceWhenAskedForClVersionThenReport21) {
|
||||
auto version = pClDevice->getEnabledClVersion();
|
||||
EXPECT_EQ(21u, version);
|
||||
}
|
@ -14,12 +14,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> BdwDeviceCaps;
|
||||
|
||||
BDWTEST_F(BdwDeviceCaps, givenBdwDeviceWhenAskedForClVersionThenReport21) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
BDWTEST_F(BdwDeviceCaps, skuSpecificCaps) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
std::string extensionString = caps.deviceExtensions;
|
||||
|
@ -12,11 +12,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> DeviceTest;
|
||||
|
||||
BXTTEST_F(DeviceTest, getEnabledClVersion12Device) {
|
||||
auto version = pClDevice->getEnabledClVersion();
|
||||
EXPECT_EQ(12u, version);
|
||||
}
|
||||
|
||||
BXTTEST_F(DeviceTest, givenBxtDeviceWhenAskedForProflingTimerResolutionThen52IsReturned) {
|
||||
auto resolution = pDevice->getProfilingTimerResolution();
|
||||
EXPECT_DOUBLE_EQ(52.083, resolution);
|
||||
|
@ -12,23 +12,13 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> BxtDeviceCaps;
|
||||
|
||||
BXTTEST_F(BxtDeviceCaps, reportsOcl12) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
BXTTEST_F(BxtDeviceCaps, givenBxtDeviceWhenAskedFor32BitSupportThenCorrectValuesAreReturned) {
|
||||
const auto &sharedCaps = pDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
|
||||
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
if (is64bit) {
|
||||
EXPECT_TRUE(memoryManager->peekForce32BitAllocations());
|
||||
|
@ -12,12 +12,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> CflDeviceCaps;
|
||||
|
||||
CFLTEST_F(CflDeviceCaps, reportsOcl21) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) {
|
||||
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages);
|
||||
}
|
||||
|
@ -21,12 +21,6 @@ GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsRetur
|
||||
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64);
|
||||
}
|
||||
|
||||
GLKTEST_F(Gen9DeviceCaps, GlkClVersionSupport) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
GLKTEST_F(Gen9DeviceCaps, GlkIs32BitOsAllocatorAvailable) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
|
@ -12,12 +12,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> KblDeviceCaps;
|
||||
|
||||
KBLTEST_F(KblDeviceCaps, reportsOcl21) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) {
|
||||
EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages);
|
||||
}
|
||||
|
@ -12,11 +12,6 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> DeviceTest;
|
||||
|
||||
SKLTEST_F(DeviceTest, getEnabledClVersion21Device) {
|
||||
auto version = pClDevice->getEnabledClVersion();
|
||||
EXPECT_EQ(21u, version);
|
||||
}
|
||||
|
||||
SKLTEST_F(DeviceTest, givenSklDeviceWhenAskedForProflingTimerResolutionThen83IsReturned) {
|
||||
auto resolution = pDevice->getProfilingTimerResolution();
|
||||
EXPECT_DOUBLE_EQ(83.333, resolution);
|
||||
|
@ -12,23 +12,13 @@ using namespace NEO;
|
||||
|
||||
typedef Test<ClDeviceFixture> SklDeviceCaps;
|
||||
|
||||
SKLTEST_F(SklDeviceCaps, reportsOcl21) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
|
||||
}
|
||||
|
||||
SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) {
|
||||
const auto &caps = pDevice->getDeviceInfo();
|
||||
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
|
||||
}
|
||||
|
||||
SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) {
|
||||
const auto &caps = pClDevice->getDeviceInfo();
|
||||
const auto &sharedCaps = pDevice->getDeviceInfo();
|
||||
EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion);
|
||||
EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion);
|
||||
|
||||
auto memoryManager = pDevice->getMemoryManager();
|
||||
EXPECT_FALSE(memoryManager->peekForce32BitAllocations());
|
||||
EXPECT_FALSE(sharedCaps.force32BitAddressess);
|
||||
|
@ -29,23 +29,3 @@ GEN9TEST_F(Gen9PlatformCaps, allSkusSupportFP64) {
|
||||
EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64")));
|
||||
}
|
||||
}
|
||||
|
||||
GEN9TEST_F(Gen9PlatformCaps, SKLVersion) {
|
||||
char *paramValue = new char[12];
|
||||
cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr);
|
||||
if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_SKYLAKE) {
|
||||
EXPECT_STREQ(paramValue, "OpenCL 2.1 ");
|
||||
}
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
delete[] paramValue;
|
||||
}
|
||||
|
||||
GEN9TEST_F(Gen9PlatformCaps, BXTVersion) {
|
||||
char *paramValue = new char[12];
|
||||
cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr);
|
||||
if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_BROXTON) {
|
||||
EXPECT_STREQ(paramValue, "OpenCL 1.2 ");
|
||||
}
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
delete[] paramValue;
|
||||
}
|
||||
|
@ -1057,37 +1057,37 @@ typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest;
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeRequiredForExecutionModelForSurfaceStatesThenReturnSizeOfBlocksPlusMaxBindingTableSizeForAllIdtEntriesAndSchedulerSshSize) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo);
|
||||
|
||||
size_t totalSize = 0;
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
size_t totalSize = 0;
|
||||
|
||||
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment
|
||||
BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager();
|
||||
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
|
||||
|
||||
uint32_t maxBindingTableCount = 0;
|
||||
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint32_t maxBindingTableCount = 0;
|
||||
|
||||
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0);
|
||||
}
|
||||
|
||||
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
||||
|
||||
auto &scheduler = pContext->getSchedulerKernel();
|
||||
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
|
||||
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
|
||||
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0);
|
||||
}
|
||||
|
||||
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
||||
|
||||
auto &scheduler = pContext->getSchedulerKernel();
|
||||
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
|
||||
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
|
||||
|
||||
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
|
||||
}
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
|
@ -37,9 +37,7 @@ typedef ExecutionModelKernelFixture KernelReflectionSurfaceTest;
|
||||
typedef ExecutionModelKernelTest KernelReflectionSurfaceWithQueueTest;
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelThenKernelReflectionSurfaceIsNull) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface());
|
||||
}
|
||||
EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface());
|
||||
}
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, GivenEmptyKernelInfoWhenPassedToGetCurbeParamsThenEmptyVectorIsReturned) {
|
||||
@ -486,127 +484,121 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithoutLocalMemoryParameterWh
|
||||
}
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorIsSortedIncreasing) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
EXPECT_NE(0u, blockCount);
|
||||
EXPECT_NE(0u, blockCount);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
|
||||
if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
}
|
||||
if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < curbeParamsForBlock.size(); i++) {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType);
|
||||
if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) {
|
||||
for (size_t i = 1; i < curbeParamsForBlock.size(); i++) {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType);
|
||||
if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) {
|
||||
|
||||
if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset);
|
||||
} else {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset);
|
||||
}
|
||||
if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset);
|
||||
} else {
|
||||
EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
|
||||
curbeParamsForBlock.resize(0);
|
||||
}
|
||||
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
|
||||
curbeParamsForBlock.resize(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorHasExpectedParamTypes) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
EXPECT_NE(0u, blockCount);
|
||||
EXPECT_NE(0u, blockCount);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
|
||||
const uint32_t bufferType = 49;
|
||||
const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50;
|
||||
const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100;
|
||||
const uint32_t bufferType = 49;
|
||||
const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50;
|
||||
const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100;
|
||||
|
||||
bool bufferFound = false;
|
||||
bool imageFound = false;
|
||||
bool samplerFound = false;
|
||||
bool bufferFound = false;
|
||||
bool imageFound = false;
|
||||
bool samplerFound = false;
|
||||
|
||||
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
|
||||
for (const auto &curbeParams : curbeParamsForBlock) {
|
||||
for (const auto &curbeParams : curbeParamsForBlock) {
|
||||
|
||||
switch (curbeParams.m_parameterType) {
|
||||
case bufferType:
|
||||
bufferFound = true;
|
||||
break;
|
||||
case imageType:
|
||||
imageFound = true;
|
||||
break;
|
||||
case samplerType:
|
||||
samplerFound = true;
|
||||
break;
|
||||
}
|
||||
switch (curbeParams.m_parameterType) {
|
||||
case bufferType:
|
||||
bufferFound = true;
|
||||
break;
|
||||
case imageType:
|
||||
imageFound = true;
|
||||
break;
|
||||
case samplerType:
|
||||
samplerFound = true;
|
||||
break;
|
||||
}
|
||||
|
||||
EXPECT_TRUE(bufferFound);
|
||||
EXPECT_TRUE(imageFound);
|
||||
EXPECT_TRUE(samplerFound);
|
||||
}
|
||||
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
|
||||
curbeParamsForBlock.resize(0);
|
||||
|
||||
EXPECT_TRUE(bufferFound);
|
||||
EXPECT_TRUE(imageFound);
|
||||
EXPECT_TRUE(samplerFound);
|
||||
}
|
||||
EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex);
|
||||
curbeParamsForBlock.resize(0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenTokenMaskIsCorrect) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
EXPECT_NE(0u, blockCount);
|
||||
EXPECT_NE(0u, blockCount);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
|
||||
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) {
|
||||
EXPECT_LT(1u, curbeParamsForBlock.size());
|
||||
|
||||
const uint64_t bufferToken = (uint64_t)1 << 63;
|
||||
const uint64_t imageToken = (uint64_t)1 << 50;
|
||||
const uint64_t samplerToken = (uint64_t)1 << 51;
|
||||
const uint64_t bufferToken = (uint64_t)1 << 63;
|
||||
const uint64_t imageToken = (uint64_t)1 << 50;
|
||||
const uint64_t samplerToken = (uint64_t)1 << 51;
|
||||
|
||||
uint64_t expectedTokens = bufferToken | imageToken | samplerToken;
|
||||
EXPECT_NE(0u, tokenMask & expectedTokens);
|
||||
}
|
||||
|
||||
curbeParamsForBlock.resize(0);
|
||||
uint64_t expectedTokens = bufferToken | imageToken | samplerToken;
|
||||
EXPECT_NE(0u, tokenMask & expectedTokens);
|
||||
}
|
||||
|
||||
curbeParamsForBlock.resize(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -755,105 +747,103 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe
|
||||
}
|
||||
|
||||
TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKernelReflectionSurfaceIsCorrect) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
EXPECT_EQ(3u, blockCount);
|
||||
EXPECT_EQ(3u, blockCount);
|
||||
|
||||
size_t maxConstantBufferSize = 0;
|
||||
size_t parentImageCount = 0;
|
||||
size_t parentSamplerCount = 0;
|
||||
size_t maxConstantBufferSize = 0;
|
||||
size_t parentImageCount = 0;
|
||||
size_t parentSamplerCount = 0;
|
||||
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
parentImageCount = 1;
|
||||
parentSamplerCount = 1;
|
||||
}
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
parentImageCount = 1;
|
||||
parentSamplerCount = 1;
|
||||
}
|
||||
|
||||
size_t samplerStateArrayAndBorderColorTotalSize = 0;
|
||||
size_t totalCurbeParamsSize = 0;
|
||||
size_t samplerStateArrayAndBorderColorTotalSize = 0;
|
||||
size_t totalCurbeParamsSize = 0;
|
||||
|
||||
std::vector<size_t> blockCurbeParamCounts(blockCount);
|
||||
std::vector<size_t> samplerStateAndBorderColorSizes(blockCount);
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
std::vector<size_t> blockCurbeParamCounts(blockCount);
|
||||
std::vector<size_t> samplerStateAndBorderColorSizes(blockCount);
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
for (size_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
uint64_t tokenMask = 0;
|
||||
uint32_t firstSSHTokenIndex = 0;
|
||||
MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo());
|
||||
|
||||
blockCurbeParamCounts[i] = curbeParamsForBlock.size();
|
||||
blockCurbeParamCounts[i] = curbeParamsForBlock.size();
|
||||
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
|
||||
totalCurbeParamsSize += blockCurbeParamCounts[i];
|
||||
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize));
|
||||
totalCurbeParamsSize += blockCurbeParamCounts[i];
|
||||
|
||||
size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo());
|
||||
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
|
||||
samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize();
|
||||
samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize;
|
||||
size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo());
|
||||
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
|
||||
samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize();
|
||||
samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize;
|
||||
|
||||
samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *));
|
||||
curbeParamsForBlock.clear();
|
||||
}
|
||||
samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *));
|
||||
curbeParamsForBlock.clear();
|
||||
}
|
||||
|
||||
totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams);
|
||||
totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams);
|
||||
|
||||
size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *));
|
||||
expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount;
|
||||
expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount +
|
||||
totalCurbeParamsSize +
|
||||
parentImageCount * sizeof(IGIL_ImageParamters) +
|
||||
parentSamplerCount * sizeof(IGIL_ParentSamplerParams) +
|
||||
samplerStateArrayAndBorderColorTotalSize;
|
||||
size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *));
|
||||
expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount;
|
||||
expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount +
|
||||
totalCurbeParamsSize +
|
||||
parentImageCount * sizeof(IGIL_ImageParamters) +
|
||||
parentSamplerCount * sizeof(IGIL_ParentSamplerParams) +
|
||||
samplerStateArrayAndBorderColorTotalSize;
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
auto reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
pKernel->createReflectionSurface();
|
||||
auto reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize());
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize());
|
||||
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
|
||||
uint32_t parentImages = 0;
|
||||
uint32_t parentSamplers = 0;
|
||||
uint32_t parentImages = 0;
|
||||
uint32_t parentSamplers = 0;
|
||||
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
parentImages = 1;
|
||||
parentSamplers = 1;
|
||||
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset);
|
||||
}
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
parentImages = 1;
|
||||
parentSamplers = 1;
|
||||
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset);
|
||||
}
|
||||
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount);
|
||||
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset);
|
||||
EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount);
|
||||
EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset);
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount);
|
||||
EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset);
|
||||
EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount);
|
||||
EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset);
|
||||
|
||||
// Curbe tokens
|
||||
EXPECT_NE(0u, totalCurbeParamsSize);
|
||||
// Curbe tokens
|
||||
EXPECT_NE(0u, totalCurbeParamsSize);
|
||||
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
|
||||
EXPECT_NE(0u, addressData->m_KernelDataOffset);
|
||||
EXPECT_NE(0u, addressData->m_BTSize);
|
||||
EXPECT_NE(0u, addressData->m_SSHTokensOffset);
|
||||
EXPECT_NE(0u, addressData->m_ConstantBufferOffset);
|
||||
EXPECT_NE(0u, addressData->m_BTSoffset);
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
|
||||
EXPECT_NE(0u, addressData->m_KernelDataOffset);
|
||||
EXPECT_NE(0u, addressData->m_BTSize);
|
||||
EXPECT_NE(0u, addressData->m_SSHTokensOffset);
|
||||
EXPECT_NE(0u, addressData->m_ConstantBufferOffset);
|
||||
EXPECT_NE(0u, addressData->m_BTSoffset);
|
||||
|
||||
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset)));
|
||||
IGIL_KernelData *kernelData = reinterpret_cast<IGIL_KernelData *>(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset)));
|
||||
|
||||
EXPECT_NE_VAL(0u, kernelData->m_SIMDSize);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_SIMDSize);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens);
|
||||
EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer);
|
||||
|
||||
for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) {
|
||||
EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType);
|
||||
}
|
||||
for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) {
|
||||
EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1026,138 +1016,130 @@ INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceTest,
|
||||
::testing::ValuesIn(KernelNames)));
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfacePatchesThenCurbeIsBlocked) {
|
||||
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pPlatform->getClDevice(0));
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_NE(0u, blockCount);
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
EXPECT_NE(0u, blockCount);
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->patchReflectionSurface(pDevQueue, nullptr);
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer();
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
pKernel->patchReflectionSurface(pDevQueue, nullptr);
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
|
||||
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer();
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
|
||||
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset);
|
||||
|
||||
IGIL_KernelAddressData *addressData = pKernelHeader->m_data;
|
||||
void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset));
|
||||
|
||||
EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset);
|
||||
|
||||
void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset));
|
||||
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) {
|
||||
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset);
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
|
||||
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
|
||||
uint64_t patchedValue64 = *patchedValue;
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64);
|
||||
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
|
||||
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue);
|
||||
}
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) {
|
||||
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset);
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
|
||||
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
|
||||
uint64_t patchedValue64 = *patchedValue;
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64);
|
||||
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
|
||||
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
|
||||
EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue);
|
||||
}
|
||||
}
|
||||
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
|
||||
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
|
||||
auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset);
|
||||
if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) {
|
||||
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
|
||||
uint64_t patchedValue64 = *patchedValue;
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
|
||||
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
|
||||
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &arg : pBlockInfo->kernelArgInfo) {
|
||||
if (arg.isDeviceQueue) {
|
||||
|
||||
auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
|
||||
uint64_t patchedValue64 = *patchedValue;
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
|
||||
} else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) {
|
||||
} else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
|
||||
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &arg : pBlockInfo->kernelArgInfo) {
|
||||
if (arg.isDeviceQueue) {
|
||||
|
||||
auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
uint32_t *patchedValue = static_cast<uint32_t *>(patchedPointer);
|
||||
uint64_t patchedValue64 = *patchedValue;
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64);
|
||||
} else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
|
||||
uint64_t *patchedValue = static_cast<uint64_t *>(patchedPointer);
|
||||
EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfaceThenParentImageAndSamplersParamsAreSet) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
|
||||
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
|
||||
size_t blockCount = blockManager->getCount();
|
||||
EXPECT_NE(0u, blockCount);
|
||||
|
||||
EXPECT_NE(0u, blockCount);
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
|
||||
std::vector<IGIL_KernelCurbeParams> curbeParamsForBlock;
|
||||
std::unique_ptr<Image> image3d(ImageHelper<Image3dDefaults>::create(context));
|
||||
std::unique_ptr<Sampler> sampler(new MockSampler(context,
|
||||
true,
|
||||
(cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
(cl_filter_mode)CL_FILTER_LINEAR));
|
||||
|
||||
std::unique_ptr<Image> image3d(ImageHelper<Image3dDefaults>::create(context));
|
||||
std::unique_ptr<Sampler> sampler(new MockSampler(context,
|
||||
true,
|
||||
(cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
(cl_filter_mode)CL_FILTER_LINEAR));
|
||||
cl_sampler samplerCl = sampler.get();
|
||||
cl_mem imageCl = image3d.get();
|
||||
|
||||
cl_sampler samplerCl = sampler.get();
|
||||
cl_mem imageCl = image3d.get();
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl);
|
||||
pKernel->setArgImage(1, sizeof(cl_mem), &imageCl);
|
||||
}
|
||||
|
||||
if (pKernel->getKernelInfo().name == "kernel_reflection") {
|
||||
pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl);
|
||||
pKernel->setArgImage(1, sizeof(cl_mem), &imageCl);
|
||||
}
|
||||
pKernel->createReflectionSurface();
|
||||
|
||||
pKernel->createReflectionSurface();
|
||||
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
|
||||
auto *reflectionSurface = pKernel->getKernelReflectionSurface();
|
||||
ASSERT_NE(nullptr, reflectionSurface);
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
IGIL_ImageParamters *pParentImageParams = reinterpret_cast<IGIL_ImageParamters *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset));
|
||||
IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast<IGIL_ParentSamplerParams *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset));
|
||||
|
||||
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
|
||||
IGIL_ImageParamters *pParentImageParams = reinterpret_cast<IGIL_ImageParamters *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset));
|
||||
IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast<IGIL_ParentSamplerParams *>(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset));
|
||||
memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount);
|
||||
memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount);
|
||||
|
||||
memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount);
|
||||
memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount);
|
||||
pKernel->patchReflectionSurface(pDevQueue, nullptr);
|
||||
|
||||
pKernel->patchReflectionSurface(pDevQueue, nullptr);
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
|
||||
EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels);
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
|
||||
for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) {
|
||||
|
||||
if (pKernelHeader->m_ParentKernelImageCount > 0) {
|
||||
uint32_t imageIndex = 0;
|
||||
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
|
||||
if (arg.isImage) {
|
||||
EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID);
|
||||
imageIndex++;
|
||||
}
|
||||
if (pKernelHeader->m_ParentKernelImageCount > 0) {
|
||||
uint32_t imageIndex = 0;
|
||||
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
|
||||
if (arg.isImage) {
|
||||
EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID);
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pKernelHeader->m_ParentSamplerCount > 0) {
|
||||
uint32_t samplerIndex = 0;
|
||||
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
|
||||
if (arg.isSampler) {
|
||||
EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID);
|
||||
samplerIndex++;
|
||||
}
|
||||
if (pKernelHeader->m_ParentSamplerCount > 0) {
|
||||
uint32_t samplerIndex = 0;
|
||||
for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) {
|
||||
if (arg.isSampler) {
|
||||
EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID);
|
||||
samplerIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -193,11 +193,8 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA
|
||||
}
|
||||
|
||||
TEST_P(ParentKernelFromBinaryTest, GivenParentKernelWhenGettingInstructionHeapSizeForExecutionModelThenSizeIsGreaterThanZero) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
|
||||
EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel());
|
||||
}
|
||||
EXPECT_TRUE(pKernel->isParentKernel);
|
||||
EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel());
|
||||
}
|
||||
|
||||
static const char *binaryFile = "simple_block_kernel";
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_source_level_debugger.h"
|
||||
#include "opencl/test/unit_test/mocks/ult_cl_device_factory.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
@ -96,14 +97,15 @@ TEST_F(PlatformTest, WhenGetClDevicesIsCalledThenExpectedValuesAreReturned) {
|
||||
}
|
||||
|
||||
TEST_F(PlatformTest, givenSupportingCl21WhenGettingExtensionsStringThenSubgroupsIsEnabled) {
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
pPlatform->initializeWithNewDevices();
|
||||
auto compilerExtensions = pPlatform->getClDevice(0)->peekCompilerExtensions();
|
||||
|
||||
auto isIndependentForwardProgressSupported = pPlatform->getClDevice(0)->getDeviceInfo().independentForwardProgress;
|
||||
|
||||
EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string(" -cl-ext=-all,+cl")));
|
||||
if ((std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) &&
|
||||
isIndependentForwardProgressSupported) {
|
||||
if (isIndependentForwardProgressSupported) {
|
||||
EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_subgroups")));
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/program/program_from_binary.h"
|
||||
#include "opencl/test/unit_test/program/program_with_source.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
@ -180,87 +181,87 @@ class ProgramNonUniformTest : public ContextFixture,
|
||||
};
|
||||
|
||||
TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) {
|
||||
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
|
||||
auto mockProgram = (MockProgram *)pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
mockProgram->setBuildOptions("-cl-std=CL2.1");
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
|
||||
auto mockProgram = (MockProgram *)pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
|
||||
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
|
||||
EXPECT_NE(nullptr, pKernelInfo);
|
||||
mockProgram->setBuildOptions("-cl-std=CL2.1");
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// create a kernel
|
||||
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
|
||||
EXPECT_NE(nullptr, pKernelInfo);
|
||||
|
||||
size_t globalWorkSize[3] = {12, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 1};
|
||||
// create a kernel
|
||||
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
3,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
size_t globalWorkSize[3] = {12, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 1};
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
3,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) {
|
||||
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
|
||||
auto mockProgram = pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
|
||||
|
||||
mockProgram->setBuildOptions("-cl-std=CL2.0");
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
CreateProgramFromBinary(pContext, &device, "kernel_data_param");
|
||||
auto mockProgram = pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
|
||||
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
|
||||
EXPECT_NE(nullptr, pKernelInfo);
|
||||
mockProgram->setBuildOptions("-cl-std=CL2.0");
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
// create a kernel
|
||||
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size");
|
||||
EXPECT_NE(nullptr, pKernelInfo);
|
||||
|
||||
size_t globalWorkSize[3] = {12, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 12};
|
||||
// create a kernel
|
||||
auto pKernel = Kernel::create<MockKernel>(mockProgram, *pKernelInfo, &retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
3,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
size_t globalWorkSize[3] = {12, 12, 12};
|
||||
size_t localWorkSize[3] = {11, 12, 1};
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
retVal = pCmdQ->enqueueKernel(
|
||||
pKernel,
|
||||
3,
|
||||
nullptr,
|
||||
globalWorkSize,
|
||||
localWorkSize,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInvalidWorkGroupSizeIsReturned) {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "opencl/test/unit_test/fixtures/run_kernel_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -38,6 +39,7 @@ class ProgramWithBlockKernelsTest : public ContextFixture,
|
||||
device = pPlatform->getClDevice(0);
|
||||
ContextFixture::SetUp(1, &device);
|
||||
ProgramFixture::SetUp();
|
||||
REQUIRE_OCL_21_OR_SKIP(pContext);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@ -50,85 +52,77 @@ class ProgramWithBlockKernelsTest : public ContextFixture,
|
||||
};
|
||||
|
||||
TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsBuildingThenKernelInfosHaveCorrectNames) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
|
||||
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
|
||||
auto mockProgram = (MockProgram *)pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
|
||||
auto mockProgram = (MockProgram *)pProgram;
|
||||
ASSERT_NE(nullptr, mockProgram);
|
||||
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = mockProgram->build(
|
||||
1,
|
||||
&device,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
false);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel");
|
||||
EXPECT_NE(nullptr, kernelInfo);
|
||||
auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel");
|
||||
EXPECT_NE(nullptr, kernelInfo);
|
||||
|
||||
auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0");
|
||||
EXPECT_EQ(nullptr, blockKernelInfo);
|
||||
auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0");
|
||||
EXPECT_EQ(nullptr, blockKernelInfo);
|
||||
|
||||
std::vector<const KernelInfo *> blockKernelInfos(mockProgram->blockKernelManager->getCount());
|
||||
std::vector<const KernelInfo *> blockKernelInfos(mockProgram->blockKernelManager->getCount());
|
||||
|
||||
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
|
||||
const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i);
|
||||
EXPECT_NE(nullptr, blockKernelInfo);
|
||||
blockKernelInfos[i] = blockKernelInfo;
|
||||
}
|
||||
|
||||
bool blockKernelFound = false;
|
||||
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
|
||||
if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) {
|
||||
blockKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_TRUE(blockKernelFound);
|
||||
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pProgram);
|
||||
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
|
||||
const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i);
|
||||
EXPECT_NE(nullptr, blockKernelInfo);
|
||||
blockKernelInfos[i] = blockKernelInfo;
|
||||
}
|
||||
|
||||
bool blockKernelFound = false;
|
||||
for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) {
|
||||
if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) {
|
||||
blockKernelFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_TRUE(blockKernelFound);
|
||||
}
|
||||
|
||||
TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsLinkedThenBlockKernelsAreSeparated) {
|
||||
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) {
|
||||
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
|
||||
const char *buildOptions = "-cl-std=CL2.0";
|
||||
CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0");
|
||||
const char *buildOptions = "-cl-std=CL2.0";
|
||||
|
||||
overwriteBuiltInBinaryName(
|
||||
&pPlatform->getClDevice(0)->getDevice(),
|
||||
"simple_block_kernel", true);
|
||||
overwriteBuiltInBinaryName(
|
||||
&pPlatform->getClDevice(0)->getDevice(),
|
||||
"simple_block_kernel", true);
|
||||
|
||||
ASSERT_NE(nullptr, pProgram);
|
||||
ASSERT_NE(nullptr, pProgram);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr);
|
||||
cl_program program = pProgram;
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr);
|
||||
cl_program program = pProgram;
|
||||
|
||||
retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr);
|
||||
retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
BlockKernelManager *blockManager = programLinked->getBlockKernelManager();
|
||||
BlockKernelManager *blockManager = programLinked->getBlockKernelManager();
|
||||
|
||||
EXPECT_NE(0u, blockManager->getCount());
|
||||
EXPECT_NE(0u, blockManager->getCount());
|
||||
|
||||
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
|
||||
const KernelInfo *info = blockManager->getBlockKernelInfo(i);
|
||||
if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) {
|
||||
break;
|
||||
}
|
||||
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
|
||||
const KernelInfo *info = blockManager->getBlockKernelInfo(i);
|
||||
if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) {
|
||||
break;
|
||||
}
|
||||
restoreBuiltInBinaryName(nullptr);
|
||||
delete programLinked;
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pProgram);
|
||||
}
|
||||
restoreBuiltInBinaryName(nullptr);
|
||||
delete programLinked;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
Reference in New Issue
Block a user