Removing grf size from kernel descriptor

Change-Id: I62954d5de596410a463459cc185ff950871fcd36
This commit is contained in:
Jaroslaw Chodor 2020-04-24 09:30:03 +02:00 committed by sys_ocldev
parent ad75f97fdc
commit a8269f55f9
8 changed files with 114 additions and 5 deletions

View File

@ -209,7 +209,7 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION;
}
auto grfSize = kernelImmData->getDescriptor().kernelAttributes.grfSize;
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
uint32_t perThreadDataSizeForWholeThreadGroupNeeded =
static_cast<uint32_t>(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(
kernelImmData->getDescriptor().kernelAttributes.simdSize, grfSize, numChannels, itemsInGroup));

View File

@ -26,6 +26,8 @@ set(L0_MOCKS_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h
)

View File

@ -34,14 +34,17 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
template <>
struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
using BaseClass = ::L0::KernelImp;
using BaseClass::BaseClass;
using ::L0::KernelImp::createPrintfBuffer;
using ::L0::KernelImp::crossThreadData;
using ::L0::KernelImp::crossThreadDataSize;
using ::L0::KernelImp::groupSize;
using ::L0::KernelImp::kernelImmData;
using ::L0::KernelImp::module;
using ::L0::KernelImp::numThreadsPerThreadGroup;
using ::L0::KernelImp::perThreadDataForWholeThreadGroup;
using ::L0::KernelImp::perThreadDataSize;
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
using ::L0::KernelImp::printfBuffer;
using ::L0::KernelImp::residencyContainer;
using ::L0::KernelImp::unifiedMemoryControls;
@ -55,8 +58,8 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
};
template <>
struct Mock<::L0::Kernel> : public ::L0::KernelImp {
using BaseClass = ::L0::KernelImp;
struct Mock<::L0::Kernel> : public WhiteBox<::L0::Kernel> {
using BaseClass = WhiteBox<::L0::Kernel>;
Mock() : BaseClass(nullptr) {
NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens;

View File

@ -0,0 +1,18 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mock_module.h"
using ::testing::Return;
namespace L0 {
namespace ult {
Mock<Module>::Mock(::L0::Device *device, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog) : WhiteBox(device, neoDevice, moduleBuildLog) { EXPECT_CALL(*this, getMaxGroupSize).WillRepeatedly(Return(256u)); }
} // namespace ult
} // namespace L0

View File

@ -0,0 +1,52 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h"
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
#endif
namespace L0 {
namespace ult {
template <>
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass = ::L0::ModuleImp;
using BaseClass::BaseClass;
};
using Module = WhiteBox<::L0::Module>;
template <>
struct Mock<Module> : public Module {
Mock() = delete;
Mock(::L0::Device *device, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog);
MOCK_METHOD2(createKernel,
ze_result_t(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction));
MOCK_METHOD0(destroy, ze_result_t());
MOCK_METHOD2(getFunctionPointer, ze_result_t(const char *pKernelName, void **pfnFunction));
MOCK_METHOD2(getNativeBinary, ze_result_t(size_t *pSize, uint8_t *pModuleNativeBinary));
MOCK_CONST_METHOD1(getKernelImmutableData, const L0::KernelImmutableData *(const char *functionName));
MOCK_CONST_METHOD0(getMaxGroupSize, uint32_t());
MOCK_METHOD2(getKernelNames, ze_result_t(uint32_t *pCount, const char **pNames));
MOCK_METHOD2(getGlobalPointer, ze_result_t(const char *pGlobalName, void **pPtr));
MOCK_CONST_METHOD0(isDebugEnabled, bool());
};
} // namespace ult
} // namespace L0
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

View File

@ -5,12 +5,48 @@
*
*/
#include "shared/test/unit_test/mocks/mock_device.h"
#include "test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
using KernelImpSetGroupSizeTest = Test<DeviceFixture>;
HWTEST_F(KernelImpSetGroupSizeTest, WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable) {
Mock<Kernel> mockKernel;
Mock<Module> mockModule(this->device, this->neoDevice, nullptr);
mockKernel.descriptor.kernelAttributes.simdSize = 1;
mockKernel.module = &mockModule;
auto grfSize = mockModule.getDevice()->getHwInfo().capabilityTable.grfSize;
uint32_t groupSize[3] = {2, 3, 5};
auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup);
EXPECT_EQ(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
ASSERT_LE(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
using LocalIdT = unsigned short;
auto threadOffsetInLocalIds = grfSize / sizeof(LocalIdT);
auto generatedLocalIds = reinterpret_cast<LocalIdT *>(mockKernel.perThreadDataForWholeThreadGroup);
uint32_t threadId = 0;
for (uint32_t z = 0; z < groupSize[2]; ++z) {
for (uint32_t y = 0; y < groupSize[1]; ++y) {
for (uint32_t x = 0; x < groupSize[0]; ++x) {
EXPECT_EQ(x, generatedLocalIds[0 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
EXPECT_EQ(y, generatedLocalIds[1 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
EXPECT_EQ(z, generatedLocalIds[2 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
++threadId;
}
}
}
}
} // namespace ult
} // namespace L0

View File

@ -60,7 +60,6 @@ struct KernelDescriptor final {
uint8_t gpuPointerSize = 0;
uint8_t simdSize = 8;
uint8_t grfSize = 32;
uint8_t numLocalIdChannels = 3;
bool supportsBuffersBiggerThan4Gb() const {

View File

@ -36,7 +36,6 @@ TEST(KernelDescriptor, WhenDefaultInitializedThenValuesAreCleared) {
EXPECT_EQ(2U, desc.kernelAttributes.workgroupDimensionsOrder[2]);
EXPECT_EQ(0U, desc.kernelAttributes.gpuPointerSize);
EXPECT_EQ(8U, desc.kernelAttributes.simdSize);
EXPECT_EQ(32U, desc.kernelAttributes.grfSize);
EXPECT_EQ(3U, desc.kernelAttributes.numLocalIdChannels);
EXPECT_EQ(0U, desc.entryPoints.skipPerThreadDataLoad);