From 7d0f23bfc9d6ba857e5d22bccebbbdbfa3fd3ca4 Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Wed, 7 Oct 2020 13:37:34 +0200 Subject: [PATCH] Enable OpenCL 3.0 by default on all devices Change-Id: Ic5e46177c957896c499b7aa6727af48105b664ac Signed-off-by: Filip Hazubski --- opencl/source/gen11/hw_info_ehl.inl | 2 +- opencl/source/gen11/hw_info_icllp.inl | 2 +- opencl/source/gen11/hw_info_lkf.inl | 2 +- opencl/source/gen8/hw_info_bdw.inl | 2 +- opencl/source/gen9/hw_info_bxt.inl | 2 +- opencl/source/gen9/hw_info_cfl.inl | 2 +- opencl/source/gen9/hw_info_glk.inl | 2 +- opencl/source/gen9/hw_info_kbl.inl | 2 +- opencl/source/gen9/hw_info_skl.inl | 2 +- .../cl_get_kernel_sub_group_info_tests.inl | 600 +++++++++--------- .../unit_test/device/device_caps_tests.cpp | 9 +- .../device_queue/device_queue_hw_tests.cpp | 161 +++-- .../enqueue_execution_model_kernel_tests.cpp | 534 ++++++++-------- .../parent_kernel_dispatch_tests.cpp | 220 +++---- .../fixtures/execution_model_fixture.h | 11 +- .../fixtures/execution_model_kernel_fixture.h | 34 +- .../gen11/lkf/test_device_caps_lkf.cpp | 6 - .../gen12lp/test_device_caps_gen12lp.inl | 7 - opencl/test/unit_test/gen8/bdw/CMakeLists.txt | 1 - .../unit_test/gen8/bdw/device_tests_bdw.cpp | 29 - .../gen8/bdw/test_device_caps_bdw.cpp | 6 - .../unit_test/gen9/bxt/device_tests_bxt.cpp | 5 - .../gen9/bxt/test_device_caps_bxt.cpp | 12 +- .../gen9/cfl/test_device_caps_cfl.cpp | 6 - .../gen9/glk/test_device_caps_glk.cpp | 6 - .../gen9/kbl/test_device_caps_kbl.cpp | 6 - .../unit_test/gen9/skl/device_tests_skl.cpp | 5 - .../gen9/skl/test_device_caps_skl.cpp | 10 - .../gen9/test_platform_caps_gen9.cpp | 20 - .../hardware_commands_helper_tests.cpp | 42 +- .../kernel_reflection_surface_tests.cpp | 484 +++++++------- .../unit_test/kernel/parent_kernel_tests.cpp | 7 +- .../unit_test/platform/platform_tests.cpp | 6 +- .../unit_test/program/program_nonuniform.cpp | 133 ++-- .../program_with_block_kernels_tests.cpp | 114 ++-- 35 files changed, 1151 insertions(+), 1341 deletions(-) delete mode 100644 opencl/test/unit_test/gen8/bdw/device_tests_bdw.cpp diff --git a/opencl/source/gen11/hw_info_ehl.inl b/opencl/source/gen11/hw_info_ehl.inl index fee9873ae5..71f2d68cea 100644 --- a/opencl/source/gen11/hw_info_ehl.inl +++ b/opencl/source/gen11/hw_info_ehl.inl @@ -46,7 +46,7 @@ const RuntimeCapabilityTable EHL::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 12, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Ehl, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen11/hw_info_icllp.inl b/opencl/source/gen11/hw_info_icllp.inl index 9056ce6ec8..76fdf5047f 100644 --- a/opencl/source/gen11/hw_info_icllp.inl +++ b/opencl/source/gen11/hw_info_icllp.inl @@ -47,7 +47,7 @@ const RuntimeCapabilityTable ICLLP::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 21, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Icllp, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen11/hw_info_lkf.inl b/opencl/source/gen11/hw_info_lkf.inl index 6d0fd4f96e..57c5fab77f 100644 --- a/opencl/source/gen11/hw_info_lkf.inl +++ b/opencl/source/gen11/hw_info_lkf.inl @@ -46,7 +46,7 @@ const RuntimeCapabilityTable LKF::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 12, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Lkf, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen8/hw_info_bdw.inl b/opencl/source/gen8/hw_info_bdw.inl index 3723ef785b..24088dbcc1 100644 --- a/opencl/source/gen8/hw_info_bdw.inl +++ b/opencl/source/gen8/hw_info_bdw.inl @@ -51,7 +51,7 @@ const RuntimeCapabilityTable BDW::capabilityTable{ PreemptionMode::Disabled, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 21, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Bdw, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen9/hw_info_bxt.inl b/opencl/source/gen9/hw_info_bxt.inl index b6df06e07d..0357950215 100644 --- a/opencl/source/gen9/hw_info_bxt.inl +++ b/opencl/source/gen9/hw_info_bxt.inl @@ -48,7 +48,7 @@ const RuntimeCapabilityTable BXT::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 12, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Bxt, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen9/hw_info_cfl.inl b/opencl/source/gen9/hw_info_cfl.inl index f97856ba0d..05d5988c42 100644 --- a/opencl/source/gen9/hw_info_cfl.inl +++ b/opencl/source/gen9/hw_info_cfl.inl @@ -43,7 +43,7 @@ const RuntimeCapabilityTable CFL::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 21, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Cfl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen9/hw_info_glk.inl b/opencl/source/gen9/hw_info_glk.inl index a883dd874d..1daa113098 100644 --- a/opencl/source/gen9/hw_info_glk.inl +++ b/opencl/source/gen9/hw_info_glk.inl @@ -43,7 +43,7 @@ const RuntimeCapabilityTable GLK::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 12, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Glk, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen9/hw_info_kbl.inl b/opencl/source/gen9/hw_info_kbl.inl index 223a74d0d3..de77f94076 100644 --- a/opencl/source/gen9/hw_info_kbl.inl +++ b/opencl/source/gen9/hw_info_kbl.inl @@ -43,7 +43,7 @@ const RuntimeCapabilityTable KBL::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 21, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Kbl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/source/gen9/hw_info_skl.inl b/opencl/source/gen9/hw_info_skl.inl index fe743b06b8..2cc529f5d5 100644 --- a/opencl/source/gen9/hw_info_skl.inl +++ b/opencl/source/gen9/hw_info_skl.inl @@ -51,7 +51,7 @@ const RuntimeCapabilityTable SKL::capabilityTable{ PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency - 21, // clVersionSupport + 30, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Skl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index 7c062636af..2f23be31ea 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -6,6 +6,7 @@ */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; @@ -91,37 +92,37 @@ INSTANTIATE_TEST_CASE_P(wgs, ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnSizeTest, GivenWorkGroupSizeWhenGettingMaxSubGroupSizeThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - WorkSizeParam workSize; - size_t workDim; - std::tie(workSize, workDim) = GetParam(); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - memset(inputValue, 0, sizeof(inputValue)); - inputValue[0] = workSize.x; - if (workDim > 1) { - inputValue[1] = workSize.y; - } - if (workDim > 2) { - inputValue[2] = workSize.z; - } - paramValueSizeRet = 0; + WorkSizeParam workSize; + size_t workDim; + std::tie(workSize, workDim) = GetParam(); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t) * workDim, - inputValue, - sizeof(size_t), - paramValue, - ¶mValueSizeRet); - - EXPECT_EQ(retVal, CL_SUCCESS); - - EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - - EXPECT_EQ(maxSimdSize, paramValue[0]); + memset(inputValue, 0, sizeof(inputValue)); + inputValue[0] = workSize.x; + if (workDim > 1) { + inputValue[1] = workSize.y; } + if (workDim > 2) { + inputValue[2] = workSize.z; + } + paramValueSizeRet = 0; + + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, + sizeof(size_t) * workDim, + inputValue, + sizeof(size_t), + paramValue, + ¶mValueSizeRet); + + EXPECT_EQ(retVal, CL_SUCCESS); + + EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); + + EXPECT_EQ(maxSimdSize, paramValue[0]); } typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnCountTest; @@ -133,48 +134,48 @@ INSTANTIATE_TEST_CASE_P(wgs, ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnCountTest, GivenWorkGroupSizeWhenGettingSubGroupCountThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - WorkSizeParam workSize; - size_t workDim; - std::tie(workSize, workDim) = GetParam(); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - memset(inputValue, 0, sizeof(inputValue)); - inputValue[0] = workSize.x; - if (workDim > 1) { - inputValue[1] = workSize.y; - } - if (workDim > 2) { - inputValue[2] = workSize.z; - } - paramValueSizeRet = 0; + WorkSizeParam workSize; + size_t workDim; + std::tie(workSize, workDim) = GetParam(); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, - sizeof(size_t) * workDim, - inputValue, - sizeof(size_t), - paramValue, - ¶mValueSizeRet); + memset(inputValue, 0, sizeof(inputValue)); + inputValue[0] = workSize.x; + if (workDim > 1) { + inputValue[1] = workSize.y; + } + if (workDim > 2) { + inputValue[2] = workSize.z; + } + paramValueSizeRet = 0; - EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, + sizeof(size_t) * workDim, + inputValue, + sizeof(size_t), + paramValue, + ¶mValueSizeRet); - EXPECT_EQ(sizeof(size_t), paramValueSizeRet); + EXPECT_EQ(CL_SUCCESS, retVal); - auto calculatedWGS = workSize.x; - if (workDim > 1) { - calculatedWGS *= workSize.y; - } - if (workDim > 2) { - calculatedWGS *= workSize.z; - } + EXPECT_EQ(sizeof(size_t), paramValueSizeRet); - if (calculatedWGS % maxSimdSize == 0) { - EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]); - } else { - EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]); - } + auto calculatedWGS = workSize.x; + if (workDim > 1) { + calculatedWGS *= workSize.y; + } + if (workDim > 2) { + calculatedWGS *= workSize.z; + } + + if (calculatedWGS % maxSimdSize == 0) { + EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]); + } else { + EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]); } } @@ -189,158 +190,158 @@ INSTANTIATE_TEST_CASE_P(sgn, ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnLocalSizeTest, GivenWorkGroupSizeWhenGettingLocalSizeThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - size_t subGroupsNum; - size_t workDim; - std::tie(subGroupsNum, workDim) = GetParam(); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - inputValue[0] = subGroupsNum; + size_t subGroupsNum; + size_t workDim; + std::tie(subGroupsNum, workDim) = GetParam(); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, - sizeof(size_t), - inputValue, - sizeof(size_t) * workDim, - paramValue, - ¶mValueSizeRet); + inputValue[0] = subGroupsNum; - EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, + sizeof(size_t), + inputValue, + sizeof(size_t) * workDim, + paramValue, + ¶mValueSizeRet); - EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet); + EXPECT_EQ(CL_SUCCESS, retVal); - size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize; - if (workGroupSize > calculatedMaxWorkgroupSize) { - workGroupSize = 0; - } + EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet); - EXPECT_EQ(workGroupSize, paramValue[0]); - if (workDim > 1) { - EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]); - } - if (workDim > 2) { - EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]); - } + size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize; + if (workGroupSize > calculatedMaxWorkgroupSize) { + workGroupSize = 0; + } + + EXPECT_EQ(workGroupSize, paramValue[0]); + if (workDim > 1) { + EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]); + } + if (workDim > 2) { + EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]); } } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnMaxNumberTest; TEST_F(KernelSubGroupInfoReturnMaxNumberTest, GivenWorkGroupSizeWhenGettingMaxNumSubGroupsThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_MAX_NUM_SUB_GROUPS, - 0, - nullptr, - sizeof(size_t), - paramValue, - ¶mValueSizeRet); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize)); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_MAX_NUM_SUB_GROUPS, + 0, + nullptr, + sizeof(size_t), + paramValue, + ¶mValueSizeRet); + + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); + EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileNumberTest; TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileNumSubGroupThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_COMPILE_NUM_SUB_GROUPS, - 0, - nullptr, - sizeof(size_t), - paramValue, - ¶mValueSizeRet); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber)); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_COMPILE_NUM_SUB_GROUPS, + 0, + nullptr, + sizeof(size_t), + paramValue, + ¶mValueSizeRet); + + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); + EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileSizeTest; TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSubGroupSizeThenReturnIsCalculatedCorrectly) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, - 0, - nullptr, - sizeof(size_t), - paramValue, - ¶mValueSizeRet); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_SUCCESS, retVal); + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, + 0, + nullptr, + sizeof(size_t), + paramValue, + ¶mValueSizeRet); - EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); + EXPECT_EQ(CL_SUCCESS, retVal); - size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); - if (start != std::string::npos) { - start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo().attributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); - } + EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - EXPECT_EQ(paramValue[0], requiredSubGroupSize); + size_t requiredSubGroupSize = 0; + auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); + if (start != std::string::npos) { + start += strlen("intel_reqd_sub_group_size("); + auto stop = pKernel->getKernelInfo().attributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); } + + EXPECT_EQ(paramValue[0], requiredSubGroupSize); } TEST_F(KernelSubGroupInfoTest, GivenNullKernelWhenGettingSubGroupInfoThenInvalidKernelErrorIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - nullptr, - pClDevice, - 0, - 0, - nullptr, - 0, - nullptr, - nullptr); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_INVALID_KERNEL, retVal); - } + retVal = clGetKernelSubGroupInfo( + nullptr, + pClDevice, + 0, + 0, + nullptr, + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoThenInvalidDeviceErrorIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - pKernel, - nullptr, - 0, - 0, - nullptr, - 0, - nullptr, - nullptr); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_INVALID_DEVICE, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + nullptr, + 0, + 0, + nullptr, + 0, + nullptr, + nullptr); + + EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(KernelSubGroupInfoTest, GivenInvalidParamNameWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - 0, - sizeof(size_t), - inputValue, - sizeof(size_t), - paramValue, - nullptr); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - EXPECT_EQ(CL_INVALID_VALUE, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + 0, + sizeof(size_t), + inputValue, + sizeof(size_t), + paramValue, + nullptr); + + EXPECT_EQ(CL_INVALID_VALUE, retVal); } uint32_t /*cl_kernel_sub_group_info*/ KernelSubGroupInfoInputParams[] = { @@ -385,167 +386,166 @@ TEST_P(KernelSubGroupInfoInputParamsTest, GivenOpenClVersionLowerThan21WhenGetti } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimZeroWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - 0, - inputValue, - 0, - nullptr, - nullptr); + bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + 0, + inputValue, + 0, + nullptr, + nullptr); + + EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenIndivisibleWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) - ? maxWorkDim - : 1; + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - (sizeof(size_t) * workDim) - 1, - inputValue, - 0, - nullptr, - nullptr); + bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) + ? maxWorkDim + : 1; - EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + (sizeof(size_t) * workDim) - 1, + inputValue, + 0, + nullptr, + nullptr); + + EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimGreaterThanMaxWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) - ? maxWorkDim - : 1; + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t) * (workDim + 1), - inputValue, - 0, - nullptr, - nullptr); + bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) + ? maxWorkDim + : 1; - EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t) * (workDim + 1), + inputValue, + 0, + nullptr, + nullptr); + + EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenInputValueIsNullWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || - (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) - ? maxWorkDim - : 1; + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t) * (workDim), - nullptr, - 0, - nullptr, - nullptr); + bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || + (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) + ? maxWorkDim + : 1; - EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t) * (workDim), + nullptr, + 0, + nullptr, + nullptr); + + EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenParamValueSizeZeroWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t), - inputValue, - 0, - paramValue, - nullptr); + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t), + inputValue, + 0, + paramValue, + nullptr); - EXPECT_EQ(CL_INVALID_VALUE, retVal); - } + EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenUnalignedParamValueSizeWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t), - inputValue, - (sizeof(size_t) * workDim) - 1, - paramValue, - nullptr); + size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; - EXPECT_EQ(CL_INVALID_VALUE, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t), + inputValue, + (sizeof(size_t) * workDim) - 1, + paramValue, + nullptr); + + EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenTooLargeParamValueSizeWhenGettingSubGroupInfoThenCorrectRetValIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - // paramValue size / sizeof(size_t) > MaxWorkDim - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t), - inputValue, - sizeof(size_t) * (workDim + 1), - paramValue, - nullptr); + bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; - EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + // paramValue size / sizeof(size_t) > MaxWorkDim + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t), + inputValue, + sizeof(size_t) * (workDim + 1), + paramValue, + nullptr); + + EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenNullPtrForReturnWhenGettingKernelSubGroupInfoThenSuccessIsReturned) { - if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - retVal = clGetKernelSubGroupInfo( - pKernel, - pClDevice, - GetParam(), - sizeof(size_t), - inputValue, - 0, - nullptr, - nullptr); + bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); - EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); - } + retVal = clGetKernelSubGroupInfo( + pKernel, + pClDevice, + GetParam(), + sizeof(size_t), + inputValue, + 0, + nullptr, + nullptr); + + EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } // namespace ULT diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index 5440aa1b98..16cdfe8397 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -127,8 +127,9 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) { EXPECT_NE(nullptr, caps.vendor); EXPECT_NE(nullptr, caps.driverVersion); EXPECT_NE(nullptr, caps.profile); - EXPECT_NE(nullptr, caps.clVersion); - EXPECT_NE(nullptr, caps.clCVersion); + EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion); + auto expectedClCVersion = (device->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 "); + EXPECT_STREQ(expectedClCVersion, caps.clCVersion); EXPECT_NE(0u, caps.numericClVersion); EXPECT_GT(caps.openclCAllVersions.size(), 0u); EXPECT_GT(caps.openclCFeatures.size(), 0u); @@ -1196,9 +1197,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseCertainWorkgroupSizeWhenDeviceIsCreatedIt } TEST(DeviceGetCaps, givenDebugFlagToDisableDeviceEnqueuesWhenCreatingDeviceThenDeviceQueueCapsAreSetCorrectly) { - if (defaultHwInfo->capabilityTable.clVersionSupport == 21) { - GTEST_SKIP(); - } + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDeviceEnqueueSupport.set(0); diff --git a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp index 1433cd6c87..4015d48e44 100644 --- a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp @@ -483,8 +483,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOff class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture { public: void SetUp() override { - ExecutionModelKernelFixture::SetUp(); REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); + + ExecutionModelKernelFixture::SetUp(); cl_queue_properties properties[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, @@ -503,6 +504,10 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture { ASSERT_NE(nullptr, devQueue); } void TearDown() override { + if (IsSkipped()) { + return; + } + if (devQueue) { delete devQueue; } @@ -522,130 +527,122 @@ class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture { }; HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - auto *devQueueHw = castToObject>(devQueue); + auto *devQueueHw = castToObject>(devQueue); - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); + ASSERT_NE(nullptr, devQueueHw); + auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - auto usedBeforeSSH = ssh->getUsed(); - auto usedBeforeDSH = dsh->getUsed(); + auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); + auto usedBeforeSSH = ssh->getUsed(); + auto usedBeforeDSH = dsh->getUsed(); - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false); - auto usedAfterSSH = ssh->getUsed(); - auto usedAfterDSH = dsh->getUsed(); + devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false); + auto usedAfterSSH = ssh->getUsed(); + auto usedAfterDSH = dsh->getUsed(); - EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH); + EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH); - EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH); + EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH); - alignedFree(ssh->getCpuBase()); - delete ssh; - } + alignedFree(ssh->getCpuBase()); + delete ssh; } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - auto *devQueueHw = castToObject>(devQueue); - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); + auto *devQueueHw = castToObject>(devQueue); + ASSERT_NE(nullptr, devQueueHw); + auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); + auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - uint32_t parentCount = 4; + uint32_t parentCount = 4; - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); + devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); + auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID); + EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID); - alignedFree(ssh->getCpuBase()); - delete ssh; - } + alignedFree(ssh->getCpuBase()); + delete ssh; } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - MockContext mockContext; - MockDeviceQueueHw *devQueueHw = new MockDeviceQueueHw(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); - ASSERT_NE(nullptr, devQueueHw); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - ASSERT_NE(nullptr, dsh); + MockContext mockContext; + MockDeviceQueueHw *devQueueHw = new MockDeviceQueueHw(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); + ASSERT_NE(nullptr, devQueueHw); + auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); + auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - uint32_t parentCount = 1; + uint32_t parentCount = 1; - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); + devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); + auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); - EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); - EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize()); - EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); - EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh); + EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); + EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize()); + EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); + EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh); - alignedFree(ssh->getCpuBase()); - delete ssh; - delete devQueueHw; - } + alignedFree(ssh->getCpuBase()); + delete ssh; + delete devQueueHw; } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - MockContext mockContext; - auto devQueueHw = std::make_unique>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); - auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + MockContext mockContext; + auto devQueueHw = std::make_unique>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); + auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - uint32_t parentCount = 1; + uint32_t parentCount = 1; - auto blockManager = pKernel->getProgram()->getBlockKernelManager(); - auto iddCount = blockManager->getCount(); - for (uint32_t i = 0; i < iddCount; i++) { - ((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u; - } - - auto surfaceStateHeapSize = - HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); - auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); - - devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); - - auto iddStartPtr = static_cast(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize)); - auto iddStartIndex = parentCount; - for (uint32_t i = 0; i < iddCount; i++) { - EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable()); - } - - alignedFree(ssh->getCpuBase()); + auto blockManager = pKernel->getProgram()->getBlockKernelManager(); + auto iddCount = blockManager->getCount(); + for (uint32_t i = 0; i < iddCount; i++) { + ((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u; } + + auto surfaceStateHeapSize = + HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); + + devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); + + auto iddStartPtr = static_cast(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize)); + auto iddStartIndex = parentCount; + for (uint32_t i = 0; i < iddCount; i++) { + EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable()); + } + + alignedFree(ssh->getCpuBase()); } static const char *binaryFile = "simple_block_kernel"; diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 90f310fa65..f44ad71ded 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -38,267 +38,250 @@ typedef ExecutionModelKernelTest ParentKernelEnqueueTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + uint32_t blockCount = static_cast(blockManager->getCount()); - auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); - void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); + auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); + void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); - EXPECT_NE(nullptr, executionModelDsh); + EXPECT_NE(nullptr, executionModelDsh); - INTERFACE_DESCRIPTOR_DATA *idData = static_cast(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize)); + INTERFACE_DESCRIPTOR_DATA *idData = static_cast(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize)); - size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; - EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); + size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); + uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; + EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); - MockMultiDispatchInfo multiDispatchInfo(pKernel); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); - auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); + auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); + auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); + + auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); + auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { + kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + } + + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + + if (pKernel->getKernelInfo().name == "kernel_reflection") { + EXPECT_NE(0u, idData[0].getSamplerCount()); + EXPECT_NE(0u, idData[0].getSamplerStatePointer()); + } + + EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength()); + EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength()); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode()); + EXPECT_EQ(static_cast(kernelIsaAddress), idData[0].getKernelStartPointer()); + EXPECT_EQ(static_cast(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh()); + + const uint32_t blockFirstIndex = 1; + + for (uint32_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + + ASSERT_NE(nullptr, pBlockInfo); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); + + auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize; + + const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize; + + auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload); + auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels); + uint32_t numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); + numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); + + EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength()); + EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength()); + EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer()); + + uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer(); + uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - if (pKernel->getKernelInfo().name == "kernel_reflection") { - EXPECT_NE(0u, idData[0].getSamplerCount()); - EXPECT_NE(0u, idData[0].getSamplerStatePointer()); - } - - EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength()); - EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength()); - EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode()); - EXPECT_EQ(static_cast(kernelIsaAddress), idData[0].getKernelStartPointer()); - EXPECT_EQ(static_cast(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh()); - - const uint32_t blockFirstIndex = 1; - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - - ASSERT_NE(nullptr, pBlockInfo); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); - - auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize; - - const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize; - - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload); - auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels); - uint32_t numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); - numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); - - EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength()); - EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength()); - EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer()); - - uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer(); - uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); - - auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - - if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; - } - - EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress); - } + EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident) { - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; - int32_t executionStamp = 0; - auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); - pDevice->resetCommandStreamReceiver(mockCSR); + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; + int32_t executionStamp = 0; + auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + pDevice->resetCommandStreamReceiver(mockCSR); - size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); - for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { - kernelRequiringPrivateSurface = i; - break; - } + size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); + for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { + if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { + kernelRequiringPrivateSurface = i; + break; } - - ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); - - GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); - - if (privateSurface == nullptr) { - privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface); - } - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - - EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId())); } + + ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); + + GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); + + if (privateSurface == nullptr) { + privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); + pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface); + } + + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); + + EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId())); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) { - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; - size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); - for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { - if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { - kernelRequiringPrivateSurface = i; - break; - } + size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); + for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { + if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { + kernelRequiringPrivateSurface = i; + break; } - - ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); - - auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); - - if (privateAllocation == nullptr) { - privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); - blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface); - } - - auto uEvent = make_releaseable(pContext); - auto clEvent = static_cast(uEvent.get()); - - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); - - EXPECT_FALSE(csr.isMadeResident(privateAllocation)); - uEvent->setStatus(CL_COMPLETE); - EXPECT_TRUE(csr.isMadeResident(privateAllocation)); } + + ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); + + auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); + + if (privateAllocation == nullptr) { + privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); + blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface); + } + + auto uEvent = make_releaseable(pContext); + auto clEvent = static_cast(uEvent.get()); + + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); + + EXPECT_FALSE(csr.isMadeResident(privateAllocation)); + uEvent->setStatus(CL_COMPLETE); + EXPECT_TRUE(csr.isMadeResident(privateAllocation)); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) { - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) { - if (pClDevice->areOcl21FeaturesSupported()) { - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; - blockKernelManager->makeInternalAllocationsResident(csr); + blockKernelManager->makeInternalAllocationsResident(csr); - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) { - if (pClDevice->areOcl21FeaturesSupported()) { - size_t offset[3] = {0, 0, 0}; - size_t gws[3] = {1, 1, 1}; + size_t offset[3] = {0, 0, 0}; + size_t gws[3] = {1, 1, 1}; - auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); - auto &csr = pDevice->getUltCommandStreamReceiver(); - csr.storeMakeResidentAllocations = true; + auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; - auto uEvent = make_releaseable(pContext); - auto clEvent = static_cast(uEvent.get()); + auto uEvent = make_releaseable(pContext); + auto clEvent = static_cast(uEvent.get()); - pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); + pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); - auto blockCount = blockKernelManager->getCount(); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } + auto blockCount = blockKernelManager->getCount(); + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); + } - uEvent->setStatus(CL_COMPLETE); + uEvent->setStatus(CL_COMPLETE); - for (auto blockId = 0u; blockId < blockCount; blockId++) { - EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); - } + for (auto blockId = 0u; blockId < blockCount; blockId++) { + EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - size_t executionModelDSHUsedBefore = dsh->getUsed(); + auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + size_t executionModelDSHUsedBefore = dsh->getUsed(); - uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; - EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); + uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; + EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); - MockMultiDispatchInfo multiDispatchInfo(pKernel); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - size_t executionModelDSHUsedAfterFirst = dsh->getUsed(); - EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst); + size_t executionModelDSHUsedAfterFirst = dsh->getUsed(); + EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst); - pDevQueueHw->resetDeviceQueue(); + pDevQueueHw->resetDeviceQueue(); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - size_t executionModelDSHUsedAfterSecond = dsh->getUsed(); - EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond); - } + size_t executionModelDSHUsedAfterSecond = dsh->getUsed(); + EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + pKernel->createReflectionSurface(); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - pKernel->createReflectionSurface(); - MockMultiDispatchInfo multiDispatchInfo(pKernel); + auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace()); - auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace()); - - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - auto ssh2 = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - EXPECT_EQ(ssh, ssh2); - EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation()); - } + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + auto ssh2 = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + EXPECT_EQ(ssh, ssh2); + EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation()); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) { @@ -306,64 +289,61 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + pKernel->createReflectionSurface(); - pKernel->createReflectionSurface(); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + uint32_t blockCount = static_cast(blockManager->getCount()); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); + size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); - size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - MockMultiDispatchInfo multiDispatchInfo(pKernel); + auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + // prealign the ssh so that it won't need to be realigned in enqueueKernel + // this way, we can assume the location in memory into which the surface states + // will be coies + ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - // prealign the ssh so that it won't need to be realigned in enqueueKernel - // this way, we can assume the location in memory into which the surface states - // will be coies - ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + // mark the assumed place for surface states + size_t parentSshOffset = 0; + ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - // mark the assumed place for surface states - size_t parentSshOffset = 0; - ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point - void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point + for (uint32_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + ASSERT_NE(nullptr, pBlockInfo); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); + ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); - ASSERT_NE(nullptr, pBlockInfo); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); - ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); + Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr); + blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + if (blockKernel->getNumberOfBindingTableStates() > 0) { + ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState); + auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset); + EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); + auto dstBindingTable = reinterpret_cast(dstBlockBti); - Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr); - blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - if (blockKernel->getNumberOfBindingTableStates() > 0) { - ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState); - auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset); - EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); - auto dstBindingTable = reinterpret_cast(dstBlockBti); - - auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset); - auto srcBindingTable = reinterpret_cast(srcBlockBti); - for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { - uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); - uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); - auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); - auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); - EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); - } - - blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); + auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset); + auto srcBindingTable = reinterpret_cast(srcBlockBti); + for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { + uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); + uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); + auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); + auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); + EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); } - delete blockKernel; + blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); } + + delete blockKernel; } } @@ -372,86 +352,78 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + MockMultiDispatchInfo multiDispatchInfo(pKernel); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - MockMultiDispatchInfo multiDispatchInfo(pKernel); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - - EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface()); - } + EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface()); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; + cl_queue_properties properties[3] = {0}; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; - cl_queue_properties properties[3] = {0}; + MockMultiDispatchInfo multiDispatchInfo(pKernel); + MockDeviceQueueHw mockDevQueue(context, pClDevice, properties[0]); - MockMultiDispatchInfo multiDispatchInfo(pKernel); - MockDeviceQueueHw mockDevQueue(context, pClDevice, properties[0]); + context->setDefaultDeviceQueue(&mockDevQueue); + // Acquire CS to check if reset queue was called + mockDevQueue.acquireEMCriticalSection(); - context->setDefaultDeviceQueue(&mockDevQueue); - // Acquire CS to check if reset queue was called - mockDevQueue.acquireEMCriticalSection(); + auto mockEvent = make_releaseable(context); - auto mockEvent = make_releaseable(context); + cl_event eventBlocking = mockEvent.get(); - cl_event eventBlocking = mockEvent.get(); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr); - - EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); - } + EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - ASSERT_NE(nullptr, pDevQueueHw); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + ASSERT_NE(nullptr, pDevQueueHw); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - MockMultiDispatchInfo multiDispatchInfo(pKernel); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - int32_t executionStamp = 0; - auto mockCSR = new MockCsrBase(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); - pDevice->resetCommandStreamReceiver(mockCSR); + int32_t executionStamp = 0; + auto mockCSR = new MockCsrBase(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); + pDevice->resetCommandStreamReceiver(mockCSR); - pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); + pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - auto &cmdStream = mockCSR->getCS(0); + auto &cmdStream = mockCSR->getCS(0); - HardwareParse hwParser; - hwParser.parseCommands(cmdStream, 0); - hwParser.findHardwareCommands(); + HardwareParse hwParser; + hwParser.parseCommands(cmdStream, 0); + hwParser.findHardwareCommands(); - auto stateBaseAddressItor = hwParser.itorStateBaseAddress; + auto stateBaseAddressItor = hwParser.itorStateBaseAddress; - ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor); + ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor); - auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor; + auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor; - uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress(); + uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress(); - EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress()); + EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress()); - bool dshAllocationResident = false; + bool dshAllocationResident = false; - for (auto allocation : mockCSR->madeResidentGfxAllocations) { - if (allocation == pDevQueue->getDshBuffer()) { - dshAllocationResident = true; - break; - } + for (auto allocation : mockCSR->madeResidentGfxAllocations) { + if (allocation == pDevQueue->getDshBuffer()) { + dshAllocationResident = true; + break; } - EXPECT_TRUE(dshAllocationResident); } + EXPECT_TRUE(dshAllocationResident); } INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest, diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index 250976c400..2f0bbe1a05 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -28,167 +28,155 @@ static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; typedef ExecutionModelKernelTest ParentKernelDispatchTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + KernelOperation *blockedCommandsData = nullptr; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - pKernel->createReflectionSurface(); + pKernel->createReflectionSurface(); - size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); - EXPECT_EQ(0u, dshUsedBefore); + size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); + EXPECT_EQ(0u, dshUsedBefore); - size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); + size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); - MultiDispatchInfo multiDispatchInfo(pKernel); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); + DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); + MultiDispatchInfo multiDispatchInfo(pKernel); + multiDispatchInfo.push(dispatchInfo); + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + blockedCommandsData, + nullptr, + nullptr, + nullptr, + nullptr, + CL_COMMAND_NDRANGE_KERNEL); - size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); - EXPECT_EQ(0u, dshUsedAfter); + size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); + EXPECT_EQ(0u, dshUsedAfter); - size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); - EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter); - } + size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); + EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - MockMultiDispatchInfo multiDispatchInfo(pKernel); - auto ish = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + MockMultiDispatchInfo multiDispatchInfo(pKernel); + auto ish = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - EXPECT_EQ(ishOfDevQueue, ish); - } + EXPECT_EQ(ishOfDevQueue, ish); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); + DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); - MockMultiDispatchInfo multiDispatchInfo(pKernel); - auto ioh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); - auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); + MockMultiDispatchInfo multiDispatchInfo(pKernel); + auto ioh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); + auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); - EXPECT_EQ(dshOfDevQueue, ioh); - } + EXPECT_EQ(dshOfDevQueue, ioh); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + KernelOperation *blockedCommandsData = nullptr; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - MockMultiDispatchInfo multiDispatchInfo(pKernel); + MockMultiDispatchInfo multiDispatchInfo(pKernel); - auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); + auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); - DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); + DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); + multiDispatchInfo.push(dispatchInfo); + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + blockedCommandsData, + nullptr, + nullptr, + nullptr, + nullptr, + CL_COMMAND_NDRANGE_KERNEL); - auto iohUsed = ioh.getUsed(); - EXPECT_EQ(0u, iohUsed); - } + auto iohUsed = ioh.getUsed(); + EXPECT_EQ(0u, iohUsed); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - KernelOperation *blockedCommandsData = nullptr; - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + KernelOperation *blockedCommandsData = nullptr; + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - MockMultiDispatchInfo multiDispatchInfo(pKernel); - DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData, - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); + MockMultiDispatchInfo multiDispatchInfo(pKernel); + DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); + multiDispatchInfo.push(dispatchInfo); + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + blockedCommandsData, + nullptr, + nullptr, + nullptr, + nullptr, + CL_COMMAND_NDRANGE_KERNEL); - auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); + auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); - EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); + EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); - size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); + size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); - EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); - } + EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); - const size_t globalOffsets[3] = {0, 0, 0}; - const size_t workItems[3] = {1, 1, 1}; + auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); + const size_t globalOffsets[3] = {0, 0, 0}; + const size_t workItems[3] = {1, 1, 1}; - MultiDispatchInfo multiDispatchInfo(pKernel); + MultiDispatchInfo multiDispatchInfo(pKernel); - DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); - multiDispatchInfo.push(dispatchInfo); - HardwareInterface::dispatchWalker( - *pCmdQ, - multiDispatchInfo, - CsrDependencies(), - blockedCommandsData.get(), - nullptr, - nullptr, - nullptr, - nullptr, - CL_COMMAND_NDRANGE_KERNEL); - ASSERT_NE(nullptr, blockedCommandsData); + DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); + multiDispatchInfo.push(dispatchInfo); + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + blockedCommandsData.get(), + nullptr, + nullptr, + nullptr, + nullptr, + CL_COMMAND_NDRANGE_KERNEL); + ASSERT_NE(nullptr, blockedCommandsData); - size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); + size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); - size_t sshUsed = blockedCommandsData->ssh->getUsed(); + size_t sshUsed = blockedCommandsData->ssh->getUsed(); - size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + - pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + - UnitTestHelper::getDefaultSshUsage(); + size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + + UnitTestHelper::getDefaultSshUsage(); - if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { - EXPECT_EQ(expectedSizeSSH, sshUsed); - } - - EXPECT_GE(minRequiredSize, sshUsed); - // Total SSH size including EM must be greater then ssh allocated - EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed); + if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { + EXPECT_EQ(expectedSizeSSH, sshUsed); } + + EXPECT_GE(minRequiredSize, sshUsed); + // Total SSH size including EM must be greater then ssh allocated + EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed); } INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest, diff --git a/opencl/test/unit_test/fixtures/execution_model_fixture.h b/opencl/test/unit_test/fixtures/execution_model_fixture.h index f1a9a2a796..d2585bf894 100644 --- a/opencl/test/unit_test/fixtures/execution_model_fixture.h +++ b/opencl/test/unit_test/fixtures/execution_model_fixture.h @@ -53,6 +53,8 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture, public DeviceQueueFixture { public: void SetUp() override { + REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); + DebugManager.flags.EnableTimestampPacket.set(0); ExecutionModelKernelFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); @@ -60,10 +62,11 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture, } void TearDown() override { - - DeviceQueueFixture::TearDown(); - CommandQueueHwFixture::TearDown(); - ExecutionModelKernelFixture::TearDown(); + if (!IsSkipped()) { + DeviceQueueFixture::TearDown(); + CommandQueueHwFixture::TearDown(); + ExecutionModelKernelFixture::TearDown(); + } } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { diff --git a/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h b/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h index 8b7db7755b..1b3ffa5640 100644 --- a/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h @@ -11,6 +11,7 @@ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/program/program_from_binary.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test.h" using namespace NEO; @@ -19,17 +20,10 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest, public PlatformFixture { protected: void SetUp() override { + REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); + PlatformFixture::SetUp(); - std::string temp; - temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion); - - if (temp.find("OpenCL 1.2") != std::string::npos) { - pDevice = MockDevice::createWithNewExecutionEnvironment(nullptr); - pClDevice = new MockClDevice{pDevice}; - return; - } - std::string options("-cl-std=CL2.0"); this->setOptions(options); ProgramFromBinaryTest::SetUp(); @@ -58,25 +52,23 @@ class ExecutionModelKernelFixture : public ProgramFromBinaryTest, } void TearDown() override { + if (IsSkipped()) { + return; + } if (pKernel != nullptr) { pKernel->release(); } - std::string temp; - temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion); - ProgramFromBinaryTest::TearDown(); PlatformFixture::TearDown(); - if (temp.find("OpenCL 1.2") != std::string::npos) { - if (pDevice != nullptr) { - delete pDevice; - pDevice = nullptr; - } - if (pClDevice != nullptr) { - delete pClDevice; - pClDevice = nullptr; - } + if (pDevice != nullptr) { + delete pDevice; + pDevice = nullptr; + } + if (pClDevice != nullptr) { + delete pClDevice; + pClDevice = nullptr; } } diff --git a/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp b/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp index 9684224b06..2ed85c0095 100644 --- a/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp +++ b/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp @@ -16,12 +16,6 @@ LKFTEST_F(LkfTest, givenLkfWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } -LKFTEST_F(LkfTest, givenLKFWhenCheckedOCLVersionThen21IsReported) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); -} - LKFTEST_F(LkfTest, givenLKFWhenCheckedSvmSupportThenNoSvmIsReported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(caps.svmCapabilities, 0u); diff --git a/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl b/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl index 2bd58463a2..0a50bcffc8 100644 --- a/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl @@ -14,13 +14,6 @@ using namespace NEO; typedef Test Gen12LpDeviceCaps; -GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpDeviceWhenQueryingDeviceInfoThenOcl30IsReported) { - const auto &caps = pClDevice->getDeviceInfo(); - auto expectedClCVersion = (pClDevice->isOcl21Conformant() ? "OpenCL C 3.0 " : "OpenCL C 1.2 "); - EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion); - EXPECT_STREQ(expectedClCVersion, caps.clCVersion); -} - HWTEST2_F(Gen12LpDeviceCaps, lpSkusDontSupportFP64, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; diff --git a/opencl/test/unit_test/gen8/bdw/CMakeLists.txt b/opencl/test/unit_test/gen8/bdw/CMakeLists.txt index 5ed198a848..ba8f8a2375 100644 --- a/opencl/test/unit_test/gen8/bdw/CMakeLists.txt +++ b/opencl/test/unit_test/gen8/bdw/CMakeLists.txt @@ -7,7 +7,6 @@ if(TESTS_BDW) set(IGDRCL_SRCS_tests_gen8_bdw ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bdw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bdw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bdw.cpp ) diff --git a/opencl/test/unit_test/gen8/bdw/device_tests_bdw.cpp b/opencl/test/unit_test/gen8/bdw/device_tests_bdw.cpp deleted file mode 100644 index 4b8c84347a..0000000000 --- a/opencl/test/unit_test/gen8/bdw/device_tests_bdw.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2018-2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/test/unit_test/mocks/mock_device.h" - -#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" -#include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" -#include "test.h" - -using namespace NEO; -struct BdwDeviceTest : public ClDeviceFixture, - public ::testing::Test { - void SetUp() override { - ClDeviceFixture::SetUp(); - } - - void TearDown() override { - ClDeviceFixture::TearDown(); - } -}; - -BDWTEST_F(BdwDeviceTest, givenBdwDeviceWhenAskedForClVersionThenReport21) { - auto version = pClDevice->getEnabledClVersion(); - EXPECT_EQ(21u, version); -} diff --git a/opencl/test/unit_test/gen8/bdw/test_device_caps_bdw.cpp b/opencl/test/unit_test/gen8/bdw/test_device_caps_bdw.cpp index 625d41d7b4..2763c8822e 100644 --- a/opencl/test/unit_test/gen8/bdw/test_device_caps_bdw.cpp +++ b/opencl/test/unit_test/gen8/bdw/test_device_caps_bdw.cpp @@ -14,12 +14,6 @@ using namespace NEO; typedef Test BdwDeviceCaps; -BDWTEST_F(BdwDeviceCaps, givenBdwDeviceWhenAskedForClVersionThenReport21) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); -} - BDWTEST_F(BdwDeviceCaps, skuSpecificCaps) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; diff --git a/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp b/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp index 1370d1786c..2e8be4eb0b 100644 --- a/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp +++ b/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp @@ -12,11 +12,6 @@ using namespace NEO; typedef Test DeviceTest; -BXTTEST_F(DeviceTest, getEnabledClVersion12Device) { - auto version = pClDevice->getEnabledClVersion(); - EXPECT_EQ(12u, version); -} - BXTTEST_F(DeviceTest, givenBxtDeviceWhenAskedForProflingTimerResolutionThen52IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(52.083, resolution); diff --git a/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp b/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp index ded084c0a4..0c01378f0a 100644 --- a/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp +++ b/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp @@ -12,23 +12,13 @@ using namespace NEO; typedef Test BxtDeviceCaps; -BXTTEST_F(BxtDeviceCaps, reportsOcl12) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); -} - BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(52u, caps.outProfilingTimerResolution); } -BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) { - const auto &caps = pClDevice->getDeviceInfo(); +BXTTEST_F(BxtDeviceCaps, givenBxtDeviceWhenAskedFor32BitSupportThenCorrectValuesAreReturned) { const auto &sharedCaps = pDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); - auto memoryManager = pDevice->getMemoryManager(); if (is64bit) { EXPECT_TRUE(memoryManager->peekForce32BitAllocations()); diff --git a/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp b/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp index 10e9ec4622..6b12eb08ad 100644 --- a/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp +++ b/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp @@ -12,12 +12,6 @@ using namespace NEO; typedef Test CflDeviceCaps; -CFLTEST_F(CflDeviceCaps, reportsOcl21) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); -} - CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } diff --git a/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp b/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp index 48d7314d82..60c488edcf 100644 --- a/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp +++ b/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp @@ -21,12 +21,6 @@ GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsRetur EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); } -GLKTEST_F(Gen9DeviceCaps, GlkClVersionSupport) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); -} - GLKTEST_F(Gen9DeviceCaps, GlkIs32BitOsAllocatorAvailable) { const auto &caps = pDevice->getDeviceInfo(); auto memoryManager = pDevice->getMemoryManager(); diff --git a/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp b/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp index f13e32757d..478f854938 100644 --- a/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp +++ b/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp @@ -12,12 +12,6 @@ using namespace NEO; typedef Test KblDeviceCaps; -KBLTEST_F(KblDeviceCaps, reportsOcl21) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); -} - KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } diff --git a/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp b/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp index 4b86a8a54e..dac3329c8d 100644 --- a/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp +++ b/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp @@ -12,11 +12,6 @@ using namespace NEO; typedef Test DeviceTest; -SKLTEST_F(DeviceTest, getEnabledClVersion21Device) { - auto version = pClDevice->getEnabledClVersion(); - EXPECT_EQ(21u, version); -} - SKLTEST_F(DeviceTest, givenSklDeviceWhenAskedForProflingTimerResolutionThen83IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(83.333, resolution); diff --git a/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp b/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp index 957d283ff0..7a8d72bcb2 100644 --- a/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp +++ b/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp @@ -12,23 +12,13 @@ using namespace NEO; typedef Test SklDeviceCaps; -SKLTEST_F(SklDeviceCaps, reportsOcl21) { - const auto &caps = pClDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); -} - SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) { - const auto &caps = pClDevice->getDeviceInfo(); const auto &sharedCaps = pDevice->getDeviceInfo(); - EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); - EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); - auto memoryManager = pDevice->getMemoryManager(); EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(sharedCaps.force32BitAddressess); diff --git a/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp b/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp index e3ef95957d..5d1ab7eb14 100644 --- a/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp +++ b/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp @@ -29,23 +29,3 @@ GEN9TEST_F(Gen9PlatformCaps, allSkusSupportFP64) { EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } } - -GEN9TEST_F(Gen9PlatformCaps, SKLVersion) { - char *paramValue = new char[12]; - cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr); - if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_SKYLAKE) { - EXPECT_STREQ(paramValue, "OpenCL 2.1 "); - } - EXPECT_EQ(retVal, CL_SUCCESS); - delete[] paramValue; -} - -GEN9TEST_F(Gen9PlatformCaps, BXTVersion) { - char *paramValue = new char[12]; - cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr); - if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_BROXTON) { - EXPECT_STREQ(paramValue, "OpenCL 1.2 "); - } - EXPECT_EQ(retVal, CL_SUCCESS); - delete[] paramValue; -} diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 812e9fda18..08ebebf7a5 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1057,37 +1057,37 @@ typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeRequiredForExecutionModelForSurfaceStatesThenReturnSizeOfBlocksPlusMaxBindingTableSizeForAllIdtEntriesAndSchedulerSshSize) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + REQUIRE_DEVICE_ENQUEUE_OR_SKIP(defaultHwInfo); - size_t totalSize = 0; + EXPECT_TRUE(pKernel->isParentKernel); - BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); + size_t totalSize = 0; - totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment + BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager(); + uint32_t blockCount = static_cast(blockManager->getCount()); - uint32_t maxBindingTableCount = 0; + totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + uint32_t maxBindingTableCount = 0; - totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize; - totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - - maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0); - } - - totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; - - auto &scheduler = pContext->getSchedulerKernel(); - auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(); - totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0); + for (uint32_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel)); + maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0); } + + totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; + + auto &scheduler = pContext->getSchedulerKernel(); + auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(); + totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0); + + totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + + EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel)); } static const char *binaryFile = "simple_block_kernel"; diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 2d3773bce9..707f9ee0c0 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -37,9 +37,7 @@ typedef ExecutionModelKernelFixture KernelReflectionSurfaceTest; typedef ExecutionModelKernelTest KernelReflectionSurfaceWithQueueTest; TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelThenKernelReflectionSurfaceIsNull) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface()); - } + EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface()); } TEST_P(KernelReflectionSurfaceTest, GivenEmptyKernelInfoWhenPassedToGetCurbeParamsThenEmptyVectorIsReturned) { @@ -486,127 +484,121 @@ TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithoutLocalMemoryParameterWh } TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorIsSortedIncreasing) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - EXPECT_NE(0u, blockCount); + EXPECT_NE(0u, blockCount); - std::vector curbeParamsForBlock; + std::vector curbeParamsForBlock; - for (size_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - uint64_t tokenMask = 0; - uint32_t firstSSHTokenIndex = 0; - MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); + for (size_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + uint64_t tokenMask = 0; + uint32_t firstSSHTokenIndex = 0; + MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); - if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) { - EXPECT_LT(1u, curbeParamsForBlock.size()); - } + if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) { + EXPECT_LT(1u, curbeParamsForBlock.size()); + } - for (size_t i = 1; i < curbeParamsForBlock.size(); i++) { - EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType); - if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) { + for (size_t i = 1; i < curbeParamsForBlock.size(); i++) { + EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType); + if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) { - if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) { - EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset); - } else { - EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset); - } + if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) { + EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset); + } else { + EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset); } } - EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); - curbeParamsForBlock.resize(0); } + EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); + curbeParamsForBlock.resize(0); } } TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenReturnedVectorHasExpectedParamTypes) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - EXPECT_NE(0u, blockCount); + EXPECT_NE(0u, blockCount); - std::vector curbeParamsForBlock; + std::vector curbeParamsForBlock; - for (size_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - uint64_t tokenMask = 0; - uint32_t firstSSHTokenIndex = 0; - MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); + for (size_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + uint64_t tokenMask = 0; + uint32_t firstSSHTokenIndex = 0; + MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); - const uint32_t bufferType = 49; - const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50; - const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100; + const uint32_t bufferType = 49; + const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50; + const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100; - bool bufferFound = false; - bool imageFound = false; - bool samplerFound = false; + bool bufferFound = false; + bool imageFound = false; + bool samplerFound = false; - if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { - EXPECT_LT(1u, curbeParamsForBlock.size()); + if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { + EXPECT_LT(1u, curbeParamsForBlock.size()); - for (const auto &curbeParams : curbeParamsForBlock) { + for (const auto &curbeParams : curbeParamsForBlock) { - switch (curbeParams.m_parameterType) { - case bufferType: - bufferFound = true; - break; - case imageType: - imageFound = true; - break; - case samplerType: - samplerFound = true; - break; - } + switch (curbeParams.m_parameterType) { + case bufferType: + bufferFound = true; + break; + case imageType: + imageFound = true; + break; + case samplerType: + samplerFound = true; + break; } - - EXPECT_TRUE(bufferFound); - EXPECT_TRUE(imageFound); - EXPECT_TRUE(samplerFound); } - EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); - curbeParamsForBlock.resize(0); + + EXPECT_TRUE(bufferFound); + EXPECT_TRUE(imageFound); + EXPECT_TRUE(samplerFound); } + EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); + curbeParamsForBlock.resize(0); } } TEST_P(KernelReflectionSurfaceTest, WhenGettingCurbeParamsThenTokenMaskIsCorrect) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - EXPECT_NE(0u, blockCount); + EXPECT_NE(0u, blockCount); - std::vector curbeParamsForBlock; + std::vector curbeParamsForBlock; - for (size_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + for (size_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - uint64_t tokenMask = 0; - uint32_t firstSSHTokenIndex = 0; - MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); + uint64_t tokenMask = 0; + uint32_t firstSSHTokenIndex = 0; + MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); - if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { - EXPECT_LT(1u, curbeParamsForBlock.size()); + if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { + EXPECT_LT(1u, curbeParamsForBlock.size()); - const uint64_t bufferToken = (uint64_t)1 << 63; - const uint64_t imageToken = (uint64_t)1 << 50; - const uint64_t samplerToken = (uint64_t)1 << 51; + const uint64_t bufferToken = (uint64_t)1 << 63; + const uint64_t imageToken = (uint64_t)1 << 50; + const uint64_t samplerToken = (uint64_t)1 << 51; - uint64_t expectedTokens = bufferToken | imageToken | samplerToken; - EXPECT_NE(0u, tokenMask & expectedTokens); - } - - curbeParamsForBlock.resize(0); + uint64_t expectedTokens = bufferToken | imageToken | samplerToken; + EXPECT_NE(0u, tokenMask & expectedTokens); } + + curbeParamsForBlock.resize(0); } } @@ -755,105 +747,103 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe } TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKernelReflectionSurfaceIsCorrect) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_TRUE(pKernel->isParentKernel); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - EXPECT_EQ(3u, blockCount); + EXPECT_EQ(3u, blockCount); - size_t maxConstantBufferSize = 0; - size_t parentImageCount = 0; - size_t parentSamplerCount = 0; + size_t maxConstantBufferSize = 0; + size_t parentImageCount = 0; + size_t parentSamplerCount = 0; - if (pKernel->getKernelInfo().name == "kernel_reflection") { - parentImageCount = 1; - parentSamplerCount = 1; - } + if (pKernel->getKernelInfo().name == "kernel_reflection") { + parentImageCount = 1; + parentSamplerCount = 1; + } - size_t samplerStateArrayAndBorderColorTotalSize = 0; - size_t totalCurbeParamsSize = 0; + size_t samplerStateArrayAndBorderColorTotalSize = 0; + size_t totalCurbeParamsSize = 0; - std::vector blockCurbeParamCounts(blockCount); - std::vector samplerStateAndBorderColorSizes(blockCount); - std::vector curbeParamsForBlock; + std::vector blockCurbeParamCounts(blockCount); + std::vector samplerStateAndBorderColorSizes(blockCount); + std::vector curbeParamsForBlock; - for (size_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - uint64_t tokenMask = 0; - uint32_t firstSSHTokenIndex = 0; - MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); + for (size_t i = 0; i < blockCount; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + uint64_t tokenMask = 0; + uint32_t firstSSHTokenIndex = 0; + MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); - blockCurbeParamCounts[i] = curbeParamsForBlock.size(); + blockCurbeParamCounts[i] = curbeParamsForBlock.size(); - maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); - totalCurbeParamsSize += blockCurbeParamCounts[i]; + maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); + totalCurbeParamsSize += blockCurbeParamCounts[i]; - size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo()); - samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); - samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); - samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize; + size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo()); + samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); + samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); + samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize; - samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *)); - curbeParamsForBlock.clear(); - } + samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *)); + curbeParamsForBlock.clear(); + } - totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams); + totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams); - size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *)); - expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount; - expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount + - totalCurbeParamsSize + - parentImageCount * sizeof(IGIL_ImageParamters) + - parentSamplerCount * sizeof(IGIL_ParentSamplerParams) + - samplerStateArrayAndBorderColorTotalSize; + size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *)); + expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount; + expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount + + totalCurbeParamsSize + + parentImageCount * sizeof(IGIL_ImageParamters) + + parentSamplerCount * sizeof(IGIL_ParentSamplerParams) + + samplerStateArrayAndBorderColorTotalSize; - pKernel->createReflectionSurface(); - auto reflectionSurface = pKernel->getKernelReflectionSurface(); + pKernel->createReflectionSurface(); + auto reflectionSurface = pKernel->getKernelReflectionSurface(); - ASSERT_NE(nullptr, reflectionSurface); - EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize()); + ASSERT_NE(nullptr, reflectionSurface); + EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize()); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); + IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - uint32_t parentImages = 0; - uint32_t parentSamplers = 0; + uint32_t parentImages = 0; + uint32_t parentSamplers = 0; - if (pKernel->getKernelInfo().name == "kernel_reflection") { - parentImages = 1; - parentSamplers = 1; - EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); - } + if (pKernel->getKernelInfo().name == "kernel_reflection") { + parentImages = 1; + parentSamplers = 1; + EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); + } - EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); - EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount); - EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset); - EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount); - EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset); + EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); + EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount); + EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset); + EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount); + EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset); - // Curbe tokens - EXPECT_NE(0u, totalCurbeParamsSize); + // Curbe tokens + EXPECT_NE(0u, totalCurbeParamsSize); - for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { - IGIL_KernelAddressData *addressData = pKernelHeader->m_data; - EXPECT_NE(0u, addressData->m_KernelDataOffset); - EXPECT_NE(0u, addressData->m_BTSize); - EXPECT_NE(0u, addressData->m_SSHTokensOffset); - EXPECT_NE(0u, addressData->m_ConstantBufferOffset); - EXPECT_NE(0u, addressData->m_BTSoffset); + for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { + IGIL_KernelAddressData *addressData = pKernelHeader->m_data; + EXPECT_NE(0u, addressData->m_KernelDataOffset); + EXPECT_NE(0u, addressData->m_BTSize); + EXPECT_NE(0u, addressData->m_SSHTokensOffset); + EXPECT_NE(0u, addressData->m_ConstantBufferOffset); + EXPECT_NE(0u, addressData->m_BTSoffset); - IGIL_KernelData *kernelData = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset))); + IGIL_KernelData *kernelData = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset))); - EXPECT_NE_VAL(0u, kernelData->m_SIMDSize); - EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask); - EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams); - EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens); - EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer); + EXPECT_NE_VAL(0u, kernelData->m_SIMDSize); + EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask); + EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams); + EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens); + EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer); - for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) { - EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType); - } + for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) { + EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType); } } } @@ -1026,138 +1016,130 @@ INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceTest, ::testing::ValuesIn(KernelNames))); HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfacePatchesThenCurbeIsBlocked) { - REQUIRE_DEVICE_ENQUEUE_OR_SKIP(pPlatform->getClDevice(0)); + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + EXPECT_NE(0u, blockCount); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + std::vector curbeParamsForBlock; - EXPECT_NE(0u, blockCount); + pKernel->createReflectionSurface(); + pKernel->patchReflectionSurface(pDevQueue, nullptr); - std::vector curbeParamsForBlock; + auto *reflectionSurface = pKernel->getKernelReflectionSurface(); + ASSERT_NE(nullptr, reflectionSurface); + void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer(); - pKernel->createReflectionSurface(); - pKernel->patchReflectionSurface(pDevQueue, nullptr); + IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - auto *reflectionSurface = pKernel->getKernelReflectionSurface(); - ASSERT_NE(nullptr, reflectionSurface); - void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer(); + EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); + for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { + const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); + IGIL_KernelAddressData *addressData = pKernelHeader->m_data; - for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); + EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset); - IGIL_KernelAddressData *addressData = pKernelHeader->m_data; + void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset)); - EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset); - - void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset)); - - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) { - auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { - uint32_t *patchedValue = static_cast(patchedPointer); - uint64_t patchedValue64 = *patchedValue; - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64); - } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { - uint64_t *patchedValue = static_cast(patchedPointer); - EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue); - } + if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) { + auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); + if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { + uint32_t *patchedValue = static_cast(patchedPointer); + uint64_t patchedValue64 = *patchedValue; + EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64); + } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { + uint64_t *patchedValue = static_cast(patchedPointer); + EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue); } + } - if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { - auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); - if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { + if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { + auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); + if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { + uint32_t *patchedValue = static_cast(patchedPointer); + uint64_t patchedValue64 = *patchedValue; + EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); + } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { + uint64_t *patchedValue = static_cast(patchedPointer); + EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); + } + } + + for (const auto &arg : pBlockInfo->kernelArgInfo) { + if (arg.isDeviceQueue) { + + auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset); + if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); - } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { + } else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } } - - for (const auto &arg : pBlockInfo->kernelArgInfo) { - if (arg.isDeviceQueue) { - - auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset); - if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { - uint32_t *patchedValue = static_cast(patchedPointer); - uint64_t patchedValue64 = *patchedValue; - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); - } else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { - uint64_t *patchedValue = static_cast(patchedPointer); - EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); - } - } - } } } } HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingKernelReflectionSurfaceThenParentImageAndSamplersParamsAreSet) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { + BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); + size_t blockCount = blockManager->getCount(); - BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); - size_t blockCount = blockManager->getCount(); + EXPECT_NE(0u, blockCount); - EXPECT_NE(0u, blockCount); + std::vector curbeParamsForBlock; - std::vector curbeParamsForBlock; + std::unique_ptr image3d(ImageHelper::create(context)); + std::unique_ptr sampler(new MockSampler(context, + true, + (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, + (cl_filter_mode)CL_FILTER_LINEAR)); - std::unique_ptr image3d(ImageHelper::create(context)); - std::unique_ptr sampler(new MockSampler(context, - true, - (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, - (cl_filter_mode)CL_FILTER_LINEAR)); + cl_sampler samplerCl = sampler.get(); + cl_mem imageCl = image3d.get(); - cl_sampler samplerCl = sampler.get(); - cl_mem imageCl = image3d.get(); + if (pKernel->getKernelInfo().name == "kernel_reflection") { + pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); + pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); + } - if (pKernel->getKernelInfo().name == "kernel_reflection") { - pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); - pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); - } + pKernel->createReflectionSurface(); - pKernel->createReflectionSurface(); + auto *reflectionSurface = pKernel->getKernelReflectionSurface(); + ASSERT_NE(nullptr, reflectionSurface); - auto *reflectionSurface = pKernel->getKernelReflectionSurface(); - ASSERT_NE(nullptr, reflectionSurface); + IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); + IGIL_ImageParamters *pParentImageParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); + IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - IGIL_ImageParamters *pParentImageParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); - IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); + memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount); + memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount); - memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount); - memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount); + pKernel->patchReflectionSurface(pDevQueue, nullptr); - pKernel->patchReflectionSurface(pDevQueue, nullptr); + EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); - EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); + for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { - for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { - - if (pKernelHeader->m_ParentKernelImageCount > 0) { - uint32_t imageIndex = 0; - for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { - if (arg.isImage) { - EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID); - imageIndex++; - } + if (pKernelHeader->m_ParentKernelImageCount > 0) { + uint32_t imageIndex = 0; + for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { + if (arg.isImage) { + EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID); + imageIndex++; } } + } - if (pKernelHeader->m_ParentSamplerCount > 0) { - uint32_t samplerIndex = 0; - for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { - if (arg.isSampler) { - EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); - samplerIndex++; - } + if (pKernelHeader->m_ParentSamplerCount > 0) { + uint32_t samplerIndex = 0; + for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { + if (arg.isSampler) { + EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); + samplerIndex++; } } } diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index a101d0f416..d245de0fe4 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -193,11 +193,8 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsA } TEST_P(ParentKernelFromBinaryTest, GivenParentKernelWhenGettingInstructionHeapSizeForExecutionModelThenSizeIsGreaterThanZero) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - EXPECT_TRUE(pKernel->isParentKernel); - - EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel()); - } + EXPECT_TRUE(pKernel->isParentKernel); + EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel()); } static const char *binaryFile = "simple_block_kernel"; diff --git a/opencl/test/unit_test/platform/platform_tests.cpp b/opencl/test/unit_test/platform/platform_tests.cpp index 5c37e69f14..ed1952517e 100644 --- a/opencl/test/unit_test/platform/platform_tests.cpp +++ b/opencl/test/unit_test/platform/platform_tests.cpp @@ -26,6 +26,7 @@ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -96,14 +97,15 @@ TEST_F(PlatformTest, WhenGetClDevicesIsCalledThenExpectedValuesAreReturned) { } TEST_F(PlatformTest, givenSupportingCl21WhenGettingExtensionsStringThenSubgroupsIsEnabled) { + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); + pPlatform->initializeWithNewDevices(); auto compilerExtensions = pPlatform->getClDevice(0)->peekCompilerExtensions(); auto isIndependentForwardProgressSupported = pPlatform->getClDevice(0)->getDeviceInfo().independentForwardProgress; EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string(" -cl-ext=-all,+cl"))); - if ((std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) && - isIndependentForwardProgressSupported) { + if (isIndependentForwardProgressSupported) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_subgroups"))); } } diff --git a/opencl/test/unit_test/program/program_nonuniform.cpp b/opencl/test/unit_test/program/program_nonuniform.cpp index 90448186da..139bf1b4b5 100644 --- a/opencl/test/unit_test/program/program_nonuniform.cpp +++ b/opencl/test/unit_test/program/program_nonuniform.cpp @@ -20,6 +20,7 @@ #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_with_source.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test.h" #include "gmock/gmock.h" @@ -180,87 +181,87 @@ class ProgramNonUniformTest : public ContextFixture, }; TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { - CreateProgramFromBinary(pContext, &device, "kernel_data_param"); - auto mockProgram = (MockProgram *)pProgram; - ASSERT_NE(nullptr, mockProgram); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - mockProgram->setBuildOptions("-cl-std=CL2.1"); - retVal = mockProgram->build( - 1, - &device, - nullptr, - nullptr, - nullptr, - false); - EXPECT_EQ(CL_SUCCESS, retVal); + CreateProgramFromBinary(pContext, &device, "kernel_data_param"); + auto mockProgram = (MockProgram *)pProgram; + ASSERT_NE(nullptr, mockProgram); - auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); - EXPECT_NE(nullptr, pKernelInfo); + mockProgram->setBuildOptions("-cl-std=CL2.1"); + retVal = mockProgram->build( + 1, + &device, + nullptr, + nullptr, + nullptr, + false); + EXPECT_EQ(CL_SUCCESS, retVal); - // create a kernel - auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_NE(nullptr, pKernel); + auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); + EXPECT_NE(nullptr, pKernelInfo); - size_t globalWorkSize[3] = {12, 12, 12}; - size_t localWorkSize[3] = {11, 12, 1}; + // create a kernel + auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); + ASSERT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, pKernel); - retVal = pCmdQ->enqueueKernel( - pKernel, - 3, - nullptr, - globalWorkSize, - localWorkSize, - 0, - nullptr, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); + size_t globalWorkSize[3] = {12, 12, 12}; + size_t localWorkSize[3] = {11, 12, 1}; - delete pKernel; - } + retVal = pCmdQ->enqueueKernel( + pKernel, + 3, + nullptr, + globalWorkSize, + localWorkSize, + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + delete pKernel; } TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) { - CreateProgramFromBinary(pContext, &device, "kernel_data_param"); - auto mockProgram = pProgram; - ASSERT_NE(nullptr, mockProgram); + REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - mockProgram->setBuildOptions("-cl-std=CL2.0"); - retVal = mockProgram->build( - 1, - &device, - nullptr, - nullptr, - nullptr, - false); - EXPECT_EQ(CL_SUCCESS, retVal); + CreateProgramFromBinary(pContext, &device, "kernel_data_param"); + auto mockProgram = pProgram; + ASSERT_NE(nullptr, mockProgram); - auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); - EXPECT_NE(nullptr, pKernelInfo); + mockProgram->setBuildOptions("-cl-std=CL2.0"); + retVal = mockProgram->build( + 1, + &device, + nullptr, + nullptr, + nullptr, + false); + EXPECT_EQ(CL_SUCCESS, retVal); - // create a kernel - auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_NE(nullptr, pKernel); + auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); + EXPECT_NE(nullptr, pKernelInfo); - size_t globalWorkSize[3] = {12, 12, 12}; - size_t localWorkSize[3] = {11, 12, 12}; + // create a kernel + auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); + ASSERT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, pKernel); - retVal = pCmdQ->enqueueKernel( - pKernel, - 3, - nullptr, - globalWorkSize, - localWorkSize, - 0, - nullptr, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); + size_t globalWorkSize[3] = {12, 12, 12}; + size_t localWorkSize[3] = {11, 12, 1}; - delete pKernel; - } + retVal = pCmdQ->enqueueKernel( + pKernel, + 3, + nullptr, + globalWorkSize, + localWorkSize, + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + delete pKernel; } TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInvalidWorkGroupSizeIsReturned) { diff --git a/opencl/test/unit_test/program/program_with_block_kernels_tests.cpp b/opencl/test/unit_test/program/program_with_block_kernels_tests.cpp index 7ba57e69b4..2a1543e6bf 100644 --- a/opencl/test/unit_test/program/program_with_block_kernels_tests.cpp +++ b/opencl/test/unit_test/program/program_with_block_kernels_tests.cpp @@ -16,6 +16,7 @@ #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include @@ -38,6 +39,7 @@ class ProgramWithBlockKernelsTest : public ContextFixture, device = pPlatform->getClDevice(0); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); + REQUIRE_OCL_21_OR_SKIP(pContext); } void TearDown() override { @@ -50,85 +52,77 @@ class ProgramWithBlockKernelsTest : public ContextFixture, }; TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsBuildingThenKernelInfosHaveCorrectNames) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { - CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); - auto mockProgram = (MockProgram *)pProgram; - ASSERT_NE(nullptr, mockProgram); + CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); + auto mockProgram = (MockProgram *)pProgram; + ASSERT_NE(nullptr, mockProgram); - retVal = mockProgram->build( - 1, - &device, - nullptr, - nullptr, - nullptr, - false); - EXPECT_EQ(CL_SUCCESS, retVal); + retVal = mockProgram->build( + 1, + &device, + nullptr, + nullptr, + nullptr, + false); + EXPECT_EQ(CL_SUCCESS, retVal); - auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel"); - EXPECT_NE(nullptr, kernelInfo); + auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel"); + EXPECT_NE(nullptr, kernelInfo); - auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0"); - EXPECT_EQ(nullptr, blockKernelInfo); + auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0"); + EXPECT_EQ(nullptr, blockKernelInfo); - std::vector blockKernelInfos(mockProgram->blockKernelManager->getCount()); + std::vector blockKernelInfos(mockProgram->blockKernelManager->getCount()); - for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { - const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i); - EXPECT_NE(nullptr, blockKernelInfo); - blockKernelInfos[i] = blockKernelInfo; - } - - bool blockKernelFound = false; - for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { - if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) { - blockKernelFound = true; - break; - } - } - - EXPECT_TRUE(blockKernelFound); - - } else { - EXPECT_EQ(nullptr, pProgram); + for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { + const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i); + EXPECT_NE(nullptr, blockKernelInfo); + blockKernelInfos[i] = blockKernelInfo; } + + bool blockKernelFound = false; + for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { + if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) { + blockKernelFound = true; + break; + } + } + + EXPECT_TRUE(blockKernelFound); } TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsLinkedThenBlockKernelsAreSeparated) { - if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) { - CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); - const char *buildOptions = "-cl-std=CL2.0"; + CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); + const char *buildOptions = "-cl-std=CL2.0"; - overwriteBuiltInBinaryName( - &pPlatform->getClDevice(0)->getDevice(), - "simple_block_kernel", true); + overwriteBuiltInBinaryName( + &pPlatform->getClDevice(0)->getDevice(), + "simple_block_kernel", true); - ASSERT_NE(nullptr, pProgram); + ASSERT_NE(nullptr, pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr); - cl_program program = pProgram; + EXPECT_EQ(CL_SUCCESS, retVal); + Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr); + cl_program program = pProgram; - retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr); + retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(CL_SUCCESS, retVal); - retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); + retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); - BlockKernelManager *blockManager = programLinked->getBlockKernelManager(); + BlockKernelManager *blockManager = programLinked->getBlockKernelManager(); - EXPECT_NE(0u, blockManager->getCount()); + EXPECT_NE(0u, blockManager->getCount()); - for (uint32_t i = 0; i < blockManager->getCount(); i++) { - const KernelInfo *info = blockManager->getBlockKernelInfo(i); - if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) { - break; - } + for (uint32_t i = 0; i < blockManager->getCount(); i++) { + const KernelInfo *info = blockManager->getBlockKernelInfo(i); + if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) { + break; } - restoreBuiltInBinaryName(nullptr); - delete programLinked; - } else { - EXPECT_EQ(nullptr, pProgram); } + restoreBuiltInBinaryName(nullptr); + delete programLinked; } + } // namespace NEO