diff --git a/core/helpers/simd_helper.h b/core/helpers/simd_helper.h new file mode 100644 index 0000000000..0736aa33f5 --- /dev/null +++ b/core/helpers/simd_helper.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include + +template +constexpr typename WALKER_TYPE::SIMD_SIZE getSimdConfig(uint32_t simdSize) { + return static_cast((simdSize == 1) ? (32 >> 4) : (simdSize >> 4)); +} diff --git a/core/unit_tests/gen11/CMakeLists.txt b/core/unit_tests/gen11/CMakeLists.txt index b9590031f1..fd3b044067 100644 --- a/core/unit_tests/gen11/CMakeLists.txt +++ b/core/unit_tests/gen11/CMakeLists.txt @@ -7,6 +7,7 @@ set(NEO_CORE_TESTS_GEN11 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/preamble_tests_gen11.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_gen11.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN11 ${NEO_CORE_TESTS_GEN11}) diff --git a/core/unit_tests/gen11/simd_helper_tests_gen11.cpp b/core/unit_tests/gen11/simd_helper_tests_gen11.cpp new file mode 100644 index 0000000000..44e871bcc8 --- /dev/null +++ b/core/unit_tests/gen11/simd_helper_tests_gen11.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/unit_tests/helpers/simd_helper_tests.inl" + +using namespace NEO; + +using TestSimdConfigSet = ::testing::Test; + +GEN11TEST_F(TestSimdConfigSet, GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturnedGen11) { + GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturned::TestBodyImpl(); +} \ No newline at end of file diff --git a/core/unit_tests/gen12lp/CMakeLists.txt b/core/unit_tests/gen12lp/CMakeLists.txt index d5f630d853..30b017ceed 100644 --- a/core/unit_tests/gen12lp/CMakeLists.txt +++ b/core/unit_tests/gen12lp/CMakeLists.txt @@ -6,6 +6,7 @@ set(NEO_CORE_TESTS_GEN12LP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_gen12lp.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN12LP ${NEO_CORE_TESTS_GEN12LP}) diff --git a/core/unit_tests/gen12lp/simd_helper_tests_gen12lp.cpp b/core/unit_tests/gen12lp/simd_helper_tests_gen12lp.cpp new file mode 100644 index 0000000000..3ad3826d49 --- /dev/null +++ b/core/unit_tests/gen12lp/simd_helper_tests_gen12lp.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/unit_tests/helpers/simd_helper_tests.inl" + +using namespace NEO; + +using TestSimdConfigSet = ::testing::Test; + +GEN12LPTEST_F(TestSimdConfigSet, GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturnedGen12LP) { + GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturned::TestBodyImpl(); +} \ No newline at end of file diff --git a/core/unit_tests/gen8/CMakeLists.txt b/core/unit_tests/gen8/CMakeLists.txt index 7331c3c671..513e09e956 100644 --- a/core/unit_tests/gen8/CMakeLists.txt +++ b/core/unit_tests/gen8/CMakeLists.txt @@ -7,6 +7,7 @@ set(NEO_CORE_TESTS_GEN8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_gen8.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen8.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN8 ${NEO_CORE_TESTS_GEN8}) add_subdirectories() \ No newline at end of file diff --git a/core/unit_tests/gen8/simd_helper_tests_gen8.cpp b/core/unit_tests/gen8/simd_helper_tests_gen8.cpp new file mode 100644 index 0000000000..2c6eaed886 --- /dev/null +++ b/core/unit_tests/gen8/simd_helper_tests_gen8.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/unit_tests/helpers/simd_helper_tests.inl" + +using namespace NEO; + +using TestSimdConfigSet = ::testing::Test; + +GEN8TEST_F(TestSimdConfigSet, GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturnedGen8) { + GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturned::TestBodyImpl(); +} diff --git a/core/unit_tests/gen9/CMakeLists.txt b/core/unit_tests/gen9/CMakeLists.txt index d94c795333..c9694c8a6a 100644 --- a/core/unit_tests/gen9/CMakeLists.txt +++ b/core/unit_tests/gen9/CMakeLists.txt @@ -7,6 +7,7 @@ set(NEO_CORE_TESTS_GEN9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/preamble_tests_gen9.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests_gen9.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_TESTS_GEN9 ${NEO_CORE_TESTS_GEN9}) add_subdirectories() \ No newline at end of file diff --git a/core/unit_tests/gen9/simd_helper_tests_gen9.cpp b/core/unit_tests/gen9/simd_helper_tests_gen9.cpp new file mode 100644 index 0000000000..e5bb3585d8 --- /dev/null +++ b/core/unit_tests/gen9/simd_helper_tests_gen9.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/unit_tests/helpers/simd_helper_tests.inl" + +using namespace NEO; + +using TestSimdConfigSet = ::testing::Test; + +GEN9TEST_F(TestSimdConfigSet, GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturnedGen9) { + GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturned::TestBodyImpl(); +} \ No newline at end of file diff --git a/core/unit_tests/helpers/CMakeLists.txt b/core/unit_tests/helpers/CMakeLists.txt index 13722e6c56..be325e5dbd 100644 --- a/core/unit_tests/helpers/CMakeLists.txt +++ b/core/unit_tests/helpers/CMakeLists.txt @@ -11,6 +11,7 @@ set(NEO_CORE_HELPERS_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/hash_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_leak_listener.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_management.h + ${CMAKE_CURRENT_SOURCE_DIR}/simd_helper_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/string_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/string_to_hash_tests.cpp ) diff --git a/core/unit_tests/helpers/simd_helper_tests.inl b/core/unit_tests/helpers/simd_helper_tests.inl new file mode 100644 index 0000000000..99d368d710 --- /dev/null +++ b/core/unit_tests/helpers/simd_helper_tests.inl @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "core/helpers/simd_helper.h" +#include "test.h" + +namespace NEO { + +template +class GivenSimdSizeWhenGetSimdConfigCalledThenCorrectEnumReturned { + public: + static void TestBodyImpl() { + uint32_t simd = 32; + auto result = getSimdConfig(simd); + EXPECT_EQ(result, WALKER_TYPE::SIMD_SIZE::SIMD_SIZE_SIMD32); + + simd = 16; + result = getSimdConfig(simd); + EXPECT_EQ(result, WALKER_TYPE::SIMD_SIZE::SIMD_SIZE_SIMD16); + + simd = 8; + result = getSimdConfig(simd); + EXPECT_EQ(result, WALKER_TYPE::SIMD_SIZE::SIMD_SIZE_SIMD8); + + simd = 1; + result = getSimdConfig(simd); + EXPECT_EQ(result, WALKER_TYPE::SIMD_SIZE::SIMD_SIZE_SIMD32); + } +}; +} // namespace NEO diff --git a/runtime/command_queue/gpgpu_walker_bdw_plus.inl b/runtime/command_queue/gpgpu_walker_bdw_plus.inl index 160a52609a..b9f88fbed9 100644 --- a/runtime/command_queue/gpgpu_walker_bdw_plus.inl +++ b/runtime/command_queue/gpgpu_walker_bdw_plus.inl @@ -6,6 +6,7 @@ */ #pragma once +#include "core/helpers/simd_helper.h" #include "runtime/command_queue/gpgpu_walker_base.inl" #include "runtime/helpers/engine_node_helper.h" @@ -43,7 +44,7 @@ inline size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( walkerCmd->setRightExecutionMask(static_cast(executionMask)); walkerCmd->setBottomExecutionMask(static_cast(0xffffffff)); - walkerCmd->setSimdSize(static_cast(simd >> 4)); + walkerCmd->setSimdSize(getSimdConfig>(simd)); walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); diff --git a/runtime/command_queue/local_id_gen.cpp b/runtime/command_queue/local_id_gen.cpp index 0f6ec6b604..42f5a078f5 100644 --- a/runtime/command_queue/local_id_gen.cpp +++ b/runtime/command_queue/local_id_gen.cpp @@ -52,8 +52,10 @@ void generateLocalIDs(void *buffer, uint16_t simd, const std::array LocalIDHelper::generateSimd32(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); } else if (simd == 16) { LocalIDHelper::generateSimd16(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); - } else { + } else if (simd == 8) { LocalIDHelper::generateSimd8(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); + } else { + generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder); } } @@ -114,4 +116,23 @@ inline void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, + const std::array &dimensionsOrder) { + uint32_t xDimNum = dimensionsOrder[0]; + uint32_t yDimNum = dimensionsOrder[1]; + uint32_t zDimNum = dimensionsOrder[2]; + + for (int i = 0; i < localWorkgroupSize[zDimNum]; i++) { + for (int j = 0; j < localWorkgroupSize[yDimNum]; j++) { + for (int k = 0; k < localWorkgroupSize[xDimNum]; k++) { + static_cast(b)[0] = k; + static_cast(b)[1] = j; + static_cast(b)[2] = i; + b = ptrOffset(b, sizeof(GRF)); + } + } + } +} + } // namespace NEO diff --git a/runtime/command_queue/local_id_gen.h b/runtime/command_queue/local_id_gen.h index 479af5a319..8da11df9ef 100644 --- a/runtime/command_queue/local_id_gen.h +++ b/runtime/command_queue/local_id_gen.h @@ -34,14 +34,16 @@ inline size_t getThreadsPerWG(uint32_t simd, size_t lws) { ? 5 : simd == 16 ? 4 - : 3; + : simd == 8 + ? 3 + : 0; return result; } inline size_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t numChannels = 3) { auto numGRFSPerThread = getGRFsPerThread(simd); - auto returnSize = numChannels * numGRFSPerThread * sizeof(GRF); + auto returnSize = numGRFSPerThread * sizeof(GRF) * (simd == 1 ? 1 : numChannels); returnSize = std::max(returnSize, sizeof(GRF)); return returnSize; } @@ -68,4 +70,7 @@ void generateLocalIDs(void *buffer, uint16_t simd, const std::array void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, uint16_t simd); bool isCompatibleWithLayoutForImages(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd); + +void generateLocalIDsForSimdOne(void *b, const std::array &localWorkgroupSize, + const std::array &dimensionsOrder); } // namespace NEO \ No newline at end of file diff --git a/runtime/program/kernel_info.h b/runtime/program/kernel_info.h index 90a4fb90ad..652d0a99ee 100644 --- a/runtime/program/kernel_info.h +++ b/runtime/program/kernel_info.h @@ -169,7 +169,7 @@ struct KernelInfo { size_t getBorderColorOffset() const; unsigned int getMaxSimdSize() const { const auto executionEnvironment = patchInfo.executionEnvironment; - if (executionEnvironment == nullptr) { + if (executionEnvironment == nullptr || executionEnvironment->LargestCompiledSIMDSize == 1) { return 1; } diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index 8af4dc77ba..d5539ffc96 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -124,6 +124,27 @@ HWTEST_F(DispatchWalkerTest, WhenGettingComputeDimensionsThenCorrectNumberOfDime EXPECT_EQ(3u, computeDimensions(workItems3D)); } +HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWalkerIsSetTo32Value) { + uint32_t pCmdBuffer[1024]; + MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); + LinearStream linearStream(&gfxAllocation); + + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + WALKER_TYPE *computeWalker = static_cast(linearStream.getSpace(sizeof(WALKER_TYPE))); + *computeWalker = FamilyType::cmdInitGpgpuWalker; + + size_t globalOffsets[] = {0, 0, 0}; + size_t startWorkGroups[] = {0, 0, 0}; + size_t numWorkGroups[] = {1, 1, 1}; + size_t localWorkSizesIn[] = {32, 1, 1}; + uint32_t simd = 1; + iOpenCL::SPatchThreadPayload threadPayload; + + GpgpuWalkerHelper::setGpgpuWalkerThreadData( + computeWalker, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, threadPayload, 5u); + EXPECT_EQ(computeWalker->getSimdSize(), 32 >> 4); +} + HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntChanged) { MockKernel kernel(program.get(), kernelInfo, *pDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); diff --git a/unit_tests/command_queue/local_id_tests.cpp b/unit_tests/command_queue/local_id_tests.cpp index d038736eeb..6a119a9b34 100644 --- a/unit_tests/command_queue/local_id_tests.cpp +++ b/unit_tests/command_queue/local_id_tests.cpp @@ -59,6 +59,12 @@ TEST(LocalID, PerThreadSizeLocalIDs_SIMD32) { EXPECT_EQ(6 * sizeof(GRF), getPerThreadSizeLocalIDs(simd)); } +TEST(LocalID, PerThreadSizeLocalIDs_SIMD1) { + uint32_t simd = 1; + + EXPECT_EQ(sizeof(GRF), getPerThreadSizeLocalIDs(simd)); +} + struct LocalIDFixture : public ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); diff --git a/unit_tests/helpers/per_thread_data_tests.cpp b/unit_tests/helpers/per_thread_data_tests.cpp index ef78a9bdd2..cec1b6360b 100644 --- a/unit_tests/helpers/per_thread_data_tests.cpp +++ b/unit_tests/helpers/per_thread_data_tests.cpp @@ -252,3 +252,50 @@ TEST(PerThreadDataTest, generateLocalIDs) { alignedFree(buffer); alignedFree(reference); } + +TEST(PerThreadDataTest, givenSimdEqualOneWhenSetingLocalIdsInPerThreadDataThenIdsAreSetInCorrectOrder) { + uint32_t simd = 1; + uint32_t numChannels = 3; + uint32_t localWorkSize = 24; + + size_t localWorkSizes[3] = {3, 4, 2}; + + auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, numChannels, localWorkSize); + + auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; + auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); + memset(buffer, 0, sizeOverSizedBuffer); + + // Setup reference filled with zeros + auto reference = static_cast(alignedMalloc(sizePerThreadDataTotal, 16)); + memset(reference, 0, sizePerThreadDataTotal); + + LinearStream stream(buffer, sizeOverSizedBuffer / 2); + PerThreadDataHelper::sendPerThreadData( + stream, + simd, + numChannels, + localWorkSizes, + {{0, 1, 2}}, + false); + + auto bufferPtr = buffer; + for (uint16_t i = 0; i < localWorkSizes[2]; i++) { + for (uint16_t j = 0; j < localWorkSizes[1]; j++) { + for (uint16_t k = 0; k < localWorkSizes[0]; k++) { + uint16_t ids[] = {k, j, i}; + int result = memcmp(bufferPtr, ids, sizeof(uint16_t) * 3); + EXPECT_EQ(0, result); + bufferPtr += sizeof(GRF); + } + } + } + // Check if buffer overrun happend, only first sizePerThreadDataTotal bytes can be overwriten, following should be same as reference. + for (auto i = sizePerThreadDataTotal; i < sizeOverSizedBuffer; i += sizePerThreadDataTotal) { + int result = memcmp(buffer + i, reference, sizePerThreadDataTotal); + EXPECT_EQ(0, result); + } + + alignedFree(buffer); + alignedFree(reference); +} diff --git a/unit_tests/kernel/kernel_tests.cpp b/unit_tests/kernel/kernel_tests.cpp index aa9b85d1b1..0167bbf6f0 100644 --- a/unit_tests/kernel/kernel_tests.cpp +++ b/unit_tests/kernel/kernel_tests.cpp @@ -2163,6 +2163,16 @@ TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenExecutionEnvironmen this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } +TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenLargestCompilledSimdSizeEqualOne) { + + executionEnvironment.LargestCompiledSIMDSize = 1; + + auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; + + EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); + this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; +} + TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsZero) { auto maxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX;