From c53c09da45ef27556741179fed775021bed6ca5c Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Thu, 16 Aug 2018 10:23:07 +0200 Subject: [PATCH] Limit local work sizes where local ids limit is applied. Change-Id: Id9a84d6a7d4530344771f48fd278cff9ab2dd927 --- Jenkinsfile | 2 +- runtime/command_queue/local_id_gen.cpp | 9 +- unit_tests/command_queue/local_id_tests.cpp | 200 +++++++++++++------- 3 files changed, 136 insertions(+), 75 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c472f8fbbf..5564250d2b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,4 @@ #!groovy neoDependenciesRev='790647-1082' strategy='EQUAL' -allowedCD=283 +allowedCD=296 diff --git a/runtime/command_queue/local_id_gen.cpp b/runtime/command_queue/local_id_gen.cpp index 9cae96dbd0..fb8404cff3 100644 --- a/runtime/command_queue/local_id_gen.cpp +++ b/runtime/command_queue/local_id_gen.cpp @@ -72,13 +72,12 @@ void generateLocalIDs(void *buffer, uint16_t simd, const std::array } bool isCompatibleWith4x4Layout(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd) { - uint8_t rowWidth = simd == 32u ? 32u : 16u; - uint8_t xDelta = simd == 8u ? 2u : 4u; - uint8_t yDelta = simd == 8u || localWorkgroupSize.at(1) == 4u ? 4u : rowWidth / xDelta; + //limit support to 8x4x1 and 8x8x1 LWS return dimensionsOrder.at(0) == 0 && dimensionsOrder.at(1) == 1 && - (localWorkgroupSize.at(0) & (xDelta - 1)) == 0 && - (localWorkgroupSize.at(1) & (yDelta - 1)) == 0; + localWorkgroupSize[2] == 1 && + localWorkgroupSize[0] == 8 && + (localWorkgroupSize[1] == 4 || localWorkgroupSize[1] == 8); } inline void generateLocalIDsWith4x4Layout(void *b, const std::array &localWorkgroupSize, uint16_t simd) { diff --git a/unit_tests/command_queue/local_id_tests.cpp b/unit_tests/command_queue/local_id_tests.cpp index 0da0ce918f..e4fd4808a7 100644 --- a/unit_tests/command_queue/local_id_tests.cpp +++ b/unit_tests/command_queue/local_id_tests.cpp @@ -298,8 +298,14 @@ TEST_P(LocalIDFixture, sizeCalculationLocalIDs) { using LocalIds4x4LayoutTest = ::testing::TestWithParam; -TEST(LocalIds4x4LayoutTest, given4x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) { - std::array localWorkSize{{4u, 4u, 1u}}; +TEST(LocalIds4x4LayoutTest, given8x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) { + std::array localWorkSize{{8u, 4u, 1u}}; + std::array dimensionsOrder = {{0u, 1u, 2u}}; + EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, 16)); +} + +TEST(LocalIds4x4LayoutTest, given8x8x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) { + std::array localWorkSize{{8u, 8u, 1u}}; std::array dimensionsOrder = {{0u, 1u, 2u}}; EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, 16)); } @@ -366,32 +372,34 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS4x4x2WhenGenerateLocalIdsWithKernelWithOnl auto alignedMemory = allocateAlignedMemory(size, 32); auto buffer = reinterpret_cast(alignedMemory.get()); memset(buffer, 0xff, size); - EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); - generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); + if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) { + EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); + generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); - auto numRows = elemsInBuffer / rowWidth; - auto numGrfs = numRows / 3u; + auto numRows = elemsInBuffer / rowWidth; + auto numGrfs = numRows / 3u; - for (auto i = 0u; i < numGrfs; i++) { + for (auto i = 0u; i < numGrfs; i++) { - // validate X row - uint16_t baseX = buffer[i * 3 * rowWidth]; - uint16_t currentX = baseX; - for (int j = 1; j < simd; j++) { - currentX = baseX + ((currentX + 1) & (xDelta - 1)); - EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); - } + // validate X row + uint16_t baseX = buffer[i * 3 * rowWidth]; + uint16_t currentX = baseX; + for (int j = 1; j < simd; j++) { + currentX = baseX + ((currentX + 1) & (xDelta - 1)); + EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); + } - // validate Y row - for (int j = 0; j < simd; j++) { - uint16_t expectedY = ((j / xDelta) & 0b11); - EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); - } + // validate Y row + for (int j = 0; j < simd; j++) { + uint16_t expectedY = ((j / xDelta) & 0b11); + EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); + } - // validate Z row - for (int j = 0; j < simd; j++) { - uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); //early grow Z - EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + // validate Z row + for (int j = 0; j < simd; j++) { + uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); //early grow Z + EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + } } } } @@ -410,36 +418,88 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x4x2WhenGenerateLocalIdsWithKernelWithOnl auto alignedMemory = allocateAlignedMemory(size, 32); auto buffer = reinterpret_cast(alignedMemory.get()); memset(buffer, 0xff, size); - EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); - generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); + if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) { + EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); + generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); - auto numRows = elemsInBuffer / rowWidth; - auto numGrfs = numRows / 3u; + auto numRows = elemsInBuffer / rowWidth; + auto numGrfs = numRows / 3u; - for (auto i = 0u; i < numGrfs; i++) { + for (auto i = 0u; i < numGrfs; i++) { - // validate X row - uint16_t baseX = buffer[i * 3 * rowWidth]; - uint16_t currentX = baseX; - for (int j = 1; j < simd; j++) { - if (j == 16) { - //early grow X - baseX += xDelta; + // validate X row + uint16_t baseX = buffer[i * 3 * rowWidth]; + uint16_t currentX = baseX; + for (int j = 1; j < simd; j++) { + if (j == 16) { + //early grow X + baseX += xDelta; + } + currentX = baseX + ((currentX + 1) & (xDelta - 1)); + EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); } - currentX = baseX + ((currentX + 1) & (xDelta - 1)); - EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); - } - // validate Y row - for (int j = 0; j < simd; j++) { - uint16_t expectedY = ((j / xDelta) & 0b11); - EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); - } + // validate Y row + for (int j = 0; j < simd; j++) { + uint16_t expectedY = ((j / xDelta) & 0b11); + EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); + } - // validate Z row - for (int j = 0; j < simd; j++) { - uint16_t expectedZ = 2 * i / numGrfs; - EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + // validate Z row + for (int j = 0; j < simd; j++) { + uint16_t expectedZ = 2 * i / numGrfs; + EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + } + } + } +} + +TEST_P(LocalIds4x4LayoutTest, givenLWS8x4x1WhenGenerateLocalIdsWithKernelWithOnlyImagesThenApplies4x4Layout) { + uint16_t simd = GetParam(); + uint8_t rowWidth = simd == 32 ? 32 : 16; + uint16_t xDelta = simd == 8u ? 2u : 4u; + std::array localWorkSize{8u, 4u, 1u}; + auto dimensionsOrder = std::array{{0u, 1u, 2u}}; + auto elemsInBuffer = 3u * localWorkSize.at(0) * localWorkSize.at(1) * localWorkSize.at(2); + if (simd == 8u) { + elemsInBuffer *= 2; + } + auto size = elemsInBuffer * sizeof(uint16_t); + auto alignedMemory = allocateAlignedMemory(size, 32); + auto buffer = reinterpret_cast(alignedMemory.get()); + memset(buffer, 0xff, size); + if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) { + EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); + generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); + + auto numRows = elemsInBuffer / rowWidth; + auto numGrfs = numRows / 3u; + + for (auto i = 0u; i < numGrfs; i++) { + + // validate X row + uint16_t baseX = buffer[i * 3 * rowWidth]; + uint16_t currentX = baseX; + for (int j = 1; j < simd; j++) { + if (j == 16) { + //early grow X + baseX += xDelta; + } + currentX = baseX + ((currentX + 1) & (xDelta - 1)); + EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); + } + + // validate Y row + for (int j = 0; j < simd; j++) { + uint16_t expectedY = ((j / xDelta) & 0b11); + EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); + } + + // validate Z row + for (int j = 0; j < simd; j++) { + uint16_t expectedZ = 0; + EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + } } } } @@ -458,33 +518,35 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x8x2WhenGenerateLocalIdsWithKernelWithOnl auto alignedMemory = allocateAlignedMemory(size, 32); auto buffer = reinterpret_cast(alignedMemory.get()); memset(buffer, 0xff, size); - EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); - generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); + if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) { + EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)); + generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true); - auto numRows = elemsInBuffer / rowWidth; - auto numGrfs = numRows / 3u; + auto numRows = elemsInBuffer / rowWidth; + auto numGrfs = numRows / 3u; - for (auto i = 0u; i < numGrfs; i++) { + for (auto i = 0u; i < numGrfs; i++) { - // validate X row - uint16_t baseX = buffer[i * 3 * rowWidth]; - uint16_t currentX = baseX; - for (int j = 1; j < simd; j++) { - currentX = baseX + ((currentX + 1) & (xDelta - 1)); - EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); - } + // validate X row + uint16_t baseX = buffer[i * 3 * rowWidth]; + uint16_t currentX = baseX; + for (int j = 1; j < simd; j++) { + currentX = baseX + ((currentX + 1) & (xDelta - 1)); + EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); + } - // validate Y row - uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth]; - for (int j = 0; j < simd; j++) { - uint16_t expectedY = baseY + ((j / xDelta) & 0b111); - EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); - } + // validate Y row + uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth]; + for (int j = 0; j < simd; j++) { + uint16_t expectedY = baseY + ((j / xDelta) & 0b111); + EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); + } - // validate Z row - for (int j = 0; j < simd; j++) { - uint16_t expectedZ = 2 * i / numGrfs; - EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + // validate Z row + for (int j = 0; j < simd; j++) { + uint16_t expectedZ = 2 * i / numGrfs; + EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ); + } } } }