From 6cc9b9d1258a11fdce1f02c47f0e25b7bfbedde6 Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Tue, 4 Feb 2020 17:58:41 +0100 Subject: [PATCH] Add choose max row size parameter for local id generation. Change-Id: I77185b6c114092859c742236a4dfef01deb9ea21 --- runtime/command_queue/local_id_gen.cpp | 14 ++--- runtime/command_queue/local_id_gen.h | 12 ++-- runtime/command_queue/local_id_gen.inl | 6 +- runtime/command_queue/local_id_gen_avx2.cpp | 8 +-- runtime/command_queue/local_id_gen_sse4.cpp | 8 +-- .../command_queue/flattened_id_tests.cpp | 25 ++++---- unit_tests/command_queue/local_id_tests.cpp | 57 +++++++++++++------ unit_tests/helpers/unit_test_helper.h | 2 + unit_tests/helpers/unit_test_helper.inl | 5 +- 9 files changed, 83 insertions(+), 54 deletions(-) diff --git a/runtime/command_queue/local_id_gen.cpp b/runtime/command_queue/local_id_gen.cpp index 835796d579..df2dd8441b 100644 --- a/runtime/command_queue/local_id_gen.cpp +++ b/runtime/command_queue/local_id_gen.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -26,9 +26,9 @@ const uint16_t initialLocalID[] = { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; // Lookup table for generating LocalIDs based on the SIMD of the kernel -void (*LocalIDHelper::generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder) = generateLocalIDsSimd; -void (*LocalIDHelper::generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder) = generateLocalIDsSimd; -void (*LocalIDHelper::generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder) = generateLocalIDsSimd; +void (*LocalIDHelper::generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; +void (*LocalIDHelper::generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; +void (*LocalIDHelper::generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; // Initialize the lookup table based on CPU capabilities LocalIDHelper::LocalIDHelper() { @@ -49,11 +49,11 @@ void generateLocalIDs(void *buffer, uint16_t simd, const std::array if (useLayoutForImages) { generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd); } else if (simd == 32) { - LocalIDHelper::generateSimd32(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); + LocalIDHelper::generateSimd32(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else if (simd == 16) { - LocalIDHelper::generateSimd16(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); + LocalIDHelper::generateSimd16(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else if (simd == 8) { - LocalIDHelper::generateSimd8(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder); + LocalIDHelper::generateSimd8(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else { generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder, grfSize); } diff --git a/runtime/command_queue/local_id_gen.h b/runtime/command_queue/local_id_gen.h index 832fde5160..72e98d5fa9 100644 --- a/runtime/command_queue/local_id_gen.h +++ b/runtime/command_queue/local_id_gen.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,9 +43,9 @@ inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32 } struct LocalIDHelper { - static void (*generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); - static void (*generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); - static void (*generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); + static void (*generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); + static void (*generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); + static void (*generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static LocalIDHelper initializer; @@ -57,7 +57,7 @@ extern const uint16_t initialLocalID[]; template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, - const std::array &dimensionsOrder); + const std::array &dimensionsOrder, bool chooseMaxRowSize); void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize); @@ -67,4 +67,4 @@ bool isCompatibleWithLayoutForImages(const std::array &localWorkgro void generateLocalIDsForSimdOne(void *b, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint32_t grfSize); -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/runtime/command_queue/local_id_gen.inl b/runtime/command_queue/local_id_gen.inl index f45e7470c0..6fb099d7dd 100644 --- a/runtime/command_queue/local_id_gen.inl +++ b/runtime/command_queue/local_id_gen.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,7 +13,7 @@ namespace NEO { template inline void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, - const std::array &dimensionsOrder) { + const std::array &dimensionsOrder, bool chooseMaxRowSize) { const int passes = simd / Vec::numChannels; int pass = 0; @@ -27,7 +27,7 @@ inline void generateLocalIDsSimd(void *b, const std::array &localWo auto zero = Vec::zero(); auto one = Vec::one(); - const auto threadSkipSize = (simd == 32 ? 32 : 16) * sizeof(uint16_t); + const auto threadSkipSize = ((simd == 32 || chooseMaxRowSize) ? 32 : 16) * sizeof(uint16_t); Vec vSimdX(simd); Vec vSimdY = zero; Vec vSimdZ = zero; diff --git a/runtime/command_queue/local_id_gen_avx2.cpp b/runtime/command_queue/local_id_gen_avx2.cpp index 9cb09ef4ac..73782c18c1 100644 --- a/runtime/command_queue/local_id_gen_avx2.cpp +++ b/runtime/command_queue/local_id_gen_avx2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,7 +12,7 @@ #include namespace NEO { -template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); -template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); +template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); +template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); } // namespace NEO -#endif \ No newline at end of file +#endif diff --git a/runtime/command_queue/local_id_gen_sse4.cpp b/runtime/command_queue/local_id_gen_sse4.cpp index 98dad3f119..7b45b6f487 100644 --- a/runtime/command_queue/local_id_gen_sse4.cpp +++ b/runtime/command_queue/local_id_gen_sse4.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,7 @@ #include namespace NEO { -template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); -template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); -template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder); +template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); +template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); +template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); } // namespace NEO diff --git a/unit_tests/command_queue/flattened_id_tests.cpp b/unit_tests/command_queue/flattened_id_tests.cpp index e06db332e1..4e945d91bb 100644 --- a/unit_tests/command_queue/flattened_id_tests.cpp +++ b/unit_tests/command_queue/flattened_id_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2019 Intel Corporation + * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,11 +7,12 @@ #include "core/helpers/basic_math.h" #include "core/helpers/string.h" - -#include "gtest/gtest.h" +#include "test.h" +#include "unit_tests/helpers/unit_test_helper.h" #include #include +using namespace NEO; union GRF { float fRegs[8]; @@ -110,7 +111,7 @@ void generateFlattenedIDs(void *buffer, uint32_t simd, uint32_t lwsX, uint32_t l memcpy_s(buffer, copySize, pSrc, copySize); } -struct FlattenedIDFixture : public ::testing::TestWithParam> { +struct FlattenedIDFixture : ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); localWorkSizeX = std::get<1>(GetParam()); @@ -127,12 +128,12 @@ struct FlattenedIDFixture : public ::testing::TestWithParam::useFullRowForLocalIdsGeneration); } -TEST_P(FlattenedIDFixture, checkAllWorkItemsCovered) { +HWTEST_P(FlattenedIDFixture, checkAllWorkItemsCovered) { generateFlattenedIDs(buffer, simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); - validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } TEST_P(FlattenedIDFixture, sizeCalculationLocalIDs) { diff --git a/unit_tests/command_queue/local_id_tests.cpp b/unit_tests/command_queue/local_id_tests.cpp index c12c4502e9..a768da4391 100644 --- a/unit_tests/command_queue/local_id_tests.cpp +++ b/unit_tests/command_queue/local_id_tests.cpp @@ -9,8 +9,8 @@ #include "core/helpers/basic_math.h" #include "core/helpers/ptr_math.h" #include "runtime/command_queue/local_id_gen.h" - -#include "gtest/gtest.h" +#include "test.h" +#include "unit_tests/helpers/unit_test_helper.h" #include #include @@ -68,8 +68,31 @@ TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize) EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } +TEST(LocalID, givenVariadicGrfSizeWhenLocalSizesAreEmittedTheyUseFullRowSize) { + auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize); -struct LocalIDFixture : public ::testing::TestWithParam> { + uint16_t *localIdsView = reinterpret_cast(localIdsPtr.get()); + std::array localSizes = {2u, 2u, 1u}; + std::array dimensionsOrder = {0u, 1u, 2u}; + + generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u); + EXPECT_EQ(localIdsView[0], 0u); + EXPECT_EQ(localIdsView[1], 1u); + EXPECT_EQ(localIdsView[2], 0u); + EXPECT_EQ(localIdsView[3], 1u); + + EXPECT_EQ(localIdsView[32], 0u); + EXPECT_EQ(localIdsView[33], 0u); + EXPECT_EQ(localIdsView[34], 1u); + EXPECT_EQ(localIdsView[35], 1u); + + EXPECT_EQ(localIdsView[64], 0u); + EXPECT_EQ(localIdsView[65], 0u); + EXPECT_EQ(localIdsView[66], 0u); + EXPECT_EQ(localIdsView[67], 0u); +} + +struct LocalIDFixture : ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); grfSize = std::get<1>(GetParam()); @@ -93,11 +116,11 @@ struct LocalIDFixture : public ::testing::TestWithParam{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); - validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } -TEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) { +HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) { generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); - validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } -TEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) { +HWTEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{0, 1, 2}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); - validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } -TEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) { +HWTEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{1, 0, 2}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); - validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } -TEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) { +HWTEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{2, 1, 0}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); - validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); + validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } diff --git a/unit_tests/helpers/unit_test_helper.h b/unit_tests/helpers/unit_test_helper.h index 2324008998..c326fa5107 100644 --- a/unit_tests/helpers/unit_test_helper.h +++ b/unit_tests/helpers/unit_test_helper.h @@ -42,5 +42,7 @@ struct UnitTestHelper { static const uint32_t smallestTestableSimdSize; static const AuxTranslationMode requiredAuxTranslationMode; + + static const bool useFullRowForLocalIdsGeneration; }; } // namespace NEO diff --git a/unit_tests/helpers/unit_test_helper.inl b/unit_tests/helpers/unit_test_helper.inl index e7b3331c5a..b06247a262 100644 --- a/unit_tests/helpers/unit_test_helper.inl +++ b/unit_tests/helpers/unit_test_helper.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -69,4 +69,7 @@ const uint32_t UnitTestHelper::smallestTestableSimdSize = 8; template const AuxTranslationMode UnitTestHelper::requiredAuxTranslationMode = AuxTranslationMode::Builtin; +template +const bool UnitTestHelper::useFullRowForLocalIdsGeneration = false; + } // namespace NEO