Fix generation local ids for image layout with local workgroup size 12x12x1

Change-Id: Ib723b132b570d8cfb3f72f32ddadde869607c354
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2018-08-22 10:06:53 +00:00
committed by sys_ocldev
parent 88c5d3f5c2
commit 6286f245a1
2 changed files with 28 additions and 15 deletions

View File

@@ -86,36 +86,42 @@ inline void generateLocalIDsWithLayoutForImages(void *b, const std::array<uint16
uint8_t xDelta = simd == 8u ? 2u : 4u; // difference between corresponding values in consecutive X rows
uint8_t yDelta = (simd == 8u || localWorkgroupSize.at(1) == 4u) ? 4u : rowWidth / xDelta; // difference between corresponding values in consecutive Y rows
bool earlyGrowX = localWorkgroupSize.at(1) == yDelta &&
simd == 32u &&
localWorkgroupSize.at(0) > xDelta;
auto buffer = reinterpret_cast<uint16_t *>(b);
uint16_t offset = 0u;
auto numGrfs = (localWorkgroupSize.at(0) * localWorkgroupSize.at(1) * localWorkgroupSize.at(2) + (simd - 1)) / simd;
uint8_t xMask = simd == 8u ? 0b1 : 0b11;
uint16_t x = 0u;
uint16_t y = 0u;
for (auto grfId = 0; grfId < numGrfs; grfId++) {
auto rowX = buffer + offset;
auto rowY = buffer + offset + rowWidth;
auto rowZ = buffer + offset + 2 * rowWidth;
uint16_t extraX = 0u;
uint16_t extraY = 0u;
for (uint8_t i = 0u; i < simd; i++) {
if (i == yDelta * xDelta && earlyGrowX) {
x += xDelta;
if (i > 0) {
extraX++;
if (extraX == xDelta) {
extraX = 0u;
}
if ((i & xMask) == 0) {
extraY++;
if (y + extraY == localWorkgroupSize.at(1)) {
extraY = 0;
x += xDelta;
}
}
}
if (x == localWorkgroupSize.at(0)) {
x = 0u;
y += yDelta;
if (y == localWorkgroupSize.at(1)) {
if (y >= localWorkgroupSize.at(1)) {
y = 0u;
}
}
rowX[i] = (x + (i & (xDelta - 1)));
rowY[i] = (y + i / xDelta);
if (rowY[i] >= localWorkgroupSize.at(1)) {
rowY[i] -= localWorkgroupSize.at(1);
}
rowX[i] = x + extraX;
rowY[i] = y + extraY;
rowZ[i] = 0u;
}
x += xDelta;

View File

@@ -320,15 +320,19 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
}
void validateGRF() {
uint32_t totalLocalIds = localWorkSize.at(0) * localWorkSize.at(1);
auto numRows = elemsInBuffer / rowWidth;
auto numGrfs = numRows / 3u;
for (auto i = 0u; i < numGrfs; i++) {
// validate X row
uint16_t baseX = buffer[i * 3 * rowWidth];
uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
uint16_t currentX = baseX;
for (int j = 1; j < simd; j++) {
if (simd == 32u && localWorkSize.at(1) == 4u && j == 16u) {
if (simd * i + j == totalLocalIds)
break;
if (simd == 32u && baseY + 8u > localWorkSize.at(1) && j == 16u) {
baseX += xDelta;
if (baseX == localWorkSize.at(0)) {
baseX = 0;
@@ -339,17 +343,20 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
}
// validate Y row
uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
for (int j = 0; j < simd; j++) {
if (simd * i + j == totalLocalIds)
break;
uint16_t expectedY = baseY + ((j / xDelta) & 0b111);
if (expectedY >= localWorkSize.at(1)) {
expectedY -= localWorkSize.at(1);
expectedY -= (localWorkSize.at(1) - baseY);
}
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
}
// validate Z row
for (int j = 0; j < simd; j++) {
if (simd * i + j == totalLocalIds)
break;
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], 0u);
}
}