fix(l0): check for largeGRF when computing maxWorkGroupSize

Sizing context (PVC):
When using LargeGRF (a.k.a GRF256) there are only 4 HW threads per EU
(instead of default 8). Together with SIMD16 that means that there can
be max 64 work-items per EU. With 8 EU per subslice this gives 512
work-items on a single subslice. For correct intra-WG synchronization
all its WIs must be executed on the same subslice (to access the same
SLM, where the synchronization primitives are stored). Thus, with SIMD16
and LargeGRF the work-group size must not exceed 512 (PVC example).

So far `maxWorkGroupSize` is taken solely from a DeviceInfo structure
both in `ModuleTranslationUnit::processUnpackedBinary()` and
`ModuleImp::initialize()`. This method does not take kernel parameters
(LargeGRF) into account. It allows to submit a kernel using LargeGRF
with SIMD16 with the work-group size set to 1024. That leads to a hang.

Fix the `.maxWorkGroupSize` computation so that it takes the kernel
parameters into consideration.

Add new (for discrete platforms >= XeHP) and adapt existing tests, fix
cosmetics by the way.

Similar check for OCL:
https://github.com/intel/compute-runtime/blob/master/opencl/source/comma
nd_queue/enqueue_kernel.h#L130

Related-To: NEO-7684
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2023-01-31 12:56:03 +00:00
committed by Compute-Runtime-Automation
parent a3aa7a1326
commit 2778043d67
17 changed files with 360 additions and 57 deletions

View File

@@ -282,7 +282,11 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
Vec3<size_t> groupSize{groupSizeX, groupSizeY, groupSizeZ};
auto itemsInGroup = Math::computeTotalElementsCount(groupSize);
if (itemsInGroup > module->getMaxGroupSize()) {
const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor();
if (auto maxGroupSize = module->getMaxGroupSize(kernelDescriptor); itemsInGroup > maxGroupSize) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Requested work-group size (%lu) exceeds maximum value (%u) for the kernel \"%s\" \n",
itemsInGroup, maxGroupSize, kernelDescriptor.kernelMetadata.kernelName.c_str());
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION;
}
@@ -290,7 +294,6 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
this->groupSize[0] = groupSizeX;
this->groupSize[1] = groupSizeY;
this->groupSize[2] = groupSizeZ;
const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor();
for (uint32_t i = 0u; i < 3u; i++) {
if (kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != 0 &&
kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != this->groupSize[i]) {
@@ -349,14 +352,15 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
uint32_t globalSizeZ, uint32_t *groupSizeX,
uint32_t *groupSizeY, uint32_t *groupSizeZ) {
size_t retGroupSize[3] = {};
auto maxWorkGroupSize = module->getMaxGroupSize();
auto simd = kernelImmData->getDescriptor().kernelAttributes.simdSize;
const auto &kernelDescriptor = this->getImmutableData()->getDescriptor();
auto maxWorkGroupSize = module->getMaxGroupSize(kernelDescriptor);
auto simd = kernelDescriptor.kernelAttributes.simdSize;
size_t workItems[3] = {globalSizeX, globalSizeY, globalSizeZ};
uint32_t dim = (globalSizeY > 1U) ? 2 : 1U;
dim = (globalSizeZ > 1U) ? 3 : dim;
if (NEO::DebugManager.flags.EnableComputeWorkSizeND.get()) {
auto usesImages = getImmutableData()->getDescriptor().kernelAttributes.flags.usesImages;
auto usesImages = kernelDescriptor.kernelAttributes.flags.usesImages;
auto neoDevice = module->getDevice()->getNEODevice();
const auto &deviceInfo = neoDevice->getDeviceInfo();
uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU;
@@ -367,9 +371,9 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelDescriptor.kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
neoDevice->getRootDeviceEnvironment(), numThreadsPerSubSlice, localMemSize,
usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion);
usesImages, false, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion);
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
} else {
if (1U == dim) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -15,6 +15,10 @@
struct _ze_module_handle_t {};
namespace NEO {
struct KernelDescriptor;
}
namespace L0 {
struct Device;
struct ModuleBuildLog;
@@ -48,7 +52,7 @@ struct Module : _ze_module_handle_t {
virtual const KernelImmutableData *getKernelImmutableData(const char *kernelName) const = 0;
virtual const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const = 0;
virtual uint32_t getMaxGroupSize() const = 0;
virtual uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const = 0;
virtual bool isDebugEnabled() const = 0;
virtual bool shouldAllocatePrivateMemoryPerDispatch() const = 0;
virtual uint32_t getProfileFlags() const = 0;

View File

@@ -26,8 +26,10 @@
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/string.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
@@ -367,7 +369,6 @@ ze_result_t ModuleTranslationUnit::processUnpackedBinary() {
size_t slmAvailable = 0U;
NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants;
slmAvailable = static_cast<size_t>(device->getDeviceInfo().localMemSize);
deviceInfoConstants.maxWorkGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
deviceInfoConstants.computeUnitsUsedForScratch = static_cast<uint32_t>(device->getDeviceInfo().computeUnitsUsedForScratch);
deviceInfoConstants.slmWindowSize = static_cast<uint32_t>(device->getDeviceInfo().localMemSize);
if (NEO::requiresLocalMemoryWindowVA(programInfo)) {
@@ -390,6 +391,7 @@ ze_result_t ModuleTranslationUnit::processUnpackedBinary() {
}
for (auto &kernelInfo : this->programInfo.kernelInfos) {
deviceInfoConstants.maxWorkGroupSize = gfxCoreHelper.calculateMaxWorkGroupSize(kernelInfo->kernelDescriptor, static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize));
kernelInfo->apply(deviceInfoConstants);
}
@@ -617,7 +619,7 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
registerElfInDebuggerL0();
this->maxGroupSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().maxWorkGroupSize);
this->defaultMaxGroupSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().maxWorkGroupSize);
checkIfPrivateMemoryPerDispatchIsNeeded();
@@ -709,6 +711,10 @@ const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelN
return nullptr;
}
uint32_t ModuleImp::getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const {
return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, this->defaultMaxGroupSize);
}
void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) {
if (pBuildFlags != nullptr) {
std::string buildFlags(pBuildFlags);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,9 +20,11 @@
#include <string>
namespace NEO {
struct KernelDescriptor;
namespace Debug {
struct Segments;
}
} // namespace Debug
} // namespace NEO
namespace L0 {
@@ -115,7 +117,7 @@ struct ModuleImp : public Module {
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
uint32_t getMaxGroupSize() const override { return maxGroupSize; }
uint32_t getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const override;
void createBuildOptions(const char *pBuildFlags, std::string &buildOptions, std::string &internalBuildOptions);
bool moveOptLevelOption(std::string &dstOptionsSet, std::string &srcOptionSet);
@@ -160,7 +162,7 @@ struct ModuleImp : public Module {
std::unique_ptr<ModuleTranslationUnit> translationUnit;
ModuleBuildLog *moduleBuildLog = nullptr;
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
uint32_t maxGroupSize = 0U;
uint32_t defaultMaxGroupSize = 0U;
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
NEO::Linker::RelocatedSymbolsMap symbols;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,6 +10,8 @@
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "gtest/gtest.h"
namespace L0 {
@@ -55,7 +57,8 @@ ModuleImmutableDataFixture::MockModule::MockModule(L0::Device *device,
L0::ModuleType type,
uint32_t perHwThreadPrivateMemorySize,
MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) {
mockKernelImmData->setDevice(device);
this->mockKernelImmData->setDevice(device);
this->translationUnit.reset(new MockModuleTranslationUnit(this->translationUnit.get()));
}
void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNeeded() {
@@ -110,8 +113,8 @@ void ModuleImmutableDataFixture::tearDown() {
DeviceFixture::tearDown();
}
Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
L0::Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
*result = module->initialize(desc, device->getNEODevice());

View File

@@ -51,9 +51,9 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
struct MockModule : public L0::ModuleImp {
using ModuleImp::allocatePrivateMemoryPerDispatch;
using ModuleImp::defaultMaxGroupSize;
using ModuleImp::getKernelImmutableDataVector;
using ModuleImp::kernelImmDatas;
using ModuleImp::maxGroupSize;
using ModuleImp::translationUnit;
using ModuleImp::type;
@@ -124,8 +124,8 @@ struct ModuleFixture : public DeviceFixture {
return kernelImmDatas;
}
static Module *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
static L0::Module *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
};
void setUp();

View File

@@ -1,11 +1,13 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/compiler_interface/external_functions.h"
#include "shared/source/program/kernel_info.h"
#include "shared/test/common/mocks/mock_cif.h"
#include "shared/test/common/mocks/mock_compiler_interface.h"
#include "shared/test/common/test_macros/mock_method_macros.h"
@@ -17,6 +19,52 @@
namespace L0 {
namespace ult {
struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
using BaseClass = L0::ModuleTranslationUnit;
MockModuleTranslationUnit(L0::Device *device) : BaseClass{device} {}
MockModuleTranslationUnit(L0::ModuleTranslationUnit *orig) : BaseClass{orig->device} {
std::swap(this->globalConstBuffer, orig->globalConstBuffer);
std::swap(this->globalVarBuffer, orig->globalVarBuffer);
std::swap(this->programInfo, orig->programInfo);
std::swap(this->options, orig->options);
std::swap(this->shouldSuppressRebuildWarning, orig->shouldSuppressRebuildWarning);
std::swap(this->buildLog, orig->buildLog);
std::swap(this->irBinary, orig->irBinary);
std::swap(this->irBinarySize, orig->irBinarySize);
std::swap(this->unpackedDeviceBinary, orig->unpackedDeviceBinary);
std::swap(this->unpackedDeviceBinarySize, orig->unpackedDeviceBinarySize);
std::swap(this->packedDeviceBinary, orig->packedDeviceBinary);
std::swap(this->packedDeviceBinarySize, orig->packedDeviceBinarySize);
std::swap(this->debugData, orig->debugData);
std::swap(this->debugDataSize, orig->debugDataSize);
std::swap(this->alignedvIsas, orig->alignedvIsas);
std::swap(this->specConstantsValues, orig->specConstantsValues);
std::swap(this->isBuiltIn, orig->isBuiltIn);
}
ADDMETHOD(processUnpackedBinary, ze_result_t, true, ZE_RESULT_SUCCESS, (), ());
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
if (unpackedDeviceBinarySize && unpackedDeviceBinary) {
return ZE_RESULT_SUCCESS;
} else {
return ModuleTranslationUnit::compileGenBinary(inputArgs, staticLink);
}
}
void setDummyKernelInfo() {
this->programInfo.kernelInfos.push_back(dummyKernelInfo.get());
}
std::unique_ptr<NEO::KernelInfo> dummyKernelInfo = {};
};
constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit *tu) {
return static_cast<MockModuleTranslationUnit *>(tu);
}
template <>
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass = ::L0::ModuleImp;
@@ -30,11 +78,16 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass::isFunctionSymbolExportEnabled;
using BaseClass::isGlobalSymbolExportEnabled;
using BaseClass::kernelImmDatas;
using BaseClass::maxGroupSize;
using BaseClass::symbols;
using BaseClass::translationUnit;
using BaseClass::type;
using BaseClass::unresolvedExternalsInfo;
uint32_t &maxGroupSize{BaseClass::defaultMaxGroupSize};
WhiteBox(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
: ::L0::ModuleImp{device, moduleBuildLog, type} {
this->translationUnit.reset(new MockModuleTranslationUnit{device});
}
};
using Module = WhiteBox<::L0::Module>;
@@ -50,7 +103,7 @@ struct Mock<Module> : public Module {
ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction));
ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary));
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *kernelName));
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ());
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, (const NEO::KernelDescriptor &));
ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames));
ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog));
@@ -59,23 +112,6 @@ struct Mock<Module> : public Module {
ADDMETHOD_CONST_NOBASE(isDebugEnabled, bool, false, ());
};
struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
MockModuleTranslationUnit(L0::Device *device) : L0::ModuleTranslationUnit(device) {
}
ze_result_t processUnpackedBinary() override {
return ZE_RESULT_SUCCESS;
}
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
if (unpackedDeviceBinarySize && unpackedDeviceBinary) {
return ZE_RESULT_SUCCESS;
} else {
return ModuleTranslationUnit::compileGenBinary(inputArgs, staticLink);
}
}
};
struct MockModule : public L0::ModuleImp {
using ModuleImp::debugEnabled;
using ModuleImp::debugModuleHandle;
@@ -86,11 +122,13 @@ struct MockModule : public L0::ModuleImp {
using ModuleImp::populateHostGlobalSymbolsMap;
using ModuleImp::symbols;
using ModuleImp::translationUnit;
uint32_t &maxGroupSize = ModuleImp::defaultMaxGroupSize;
MockModule(L0::Device *device,
L0::ModuleBuildLog *moduleBuildLog,
L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) {
maxGroupSize = 32;
this->translationUnit.reset(new MockModuleTranslationUnit{device});
this->maxGroupSize = 32u;
};
~MockModule() override = default;

View File

@@ -181,6 +181,8 @@ TEST_F(ModuleWithSLDTest, GivenNoDebugDataWhenInitializingModuleThenRelocatedDeb
moduleBuildLog,
ModuleType::User);
module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(module->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -203,6 +205,7 @@ TEST_F(ModuleWithSLDTest, GivenNoDebugDataWhenInitializingModuleThenRelocatedDeb
result = module->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
}
@@ -222,6 +225,8 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithSingleRelocationWhenInitializingModu
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -252,6 +257,7 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithSingleRelocationWhenInitializingModu
result = moduleMock->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
}
@@ -271,6 +277,8 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithMultipleRelocationsWhenInitializingM
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -298,6 +306,7 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithMultipleRelocationsWhenInitializingM
result = moduleMock->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
}
@@ -444,6 +453,8 @@ HWTEST_F(ModuleWithDebuggerL0MultiTileTest, GivenSubDeviceWhenCreatingModuleThen
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(subDevice0, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(subDevice0);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -471,6 +482,7 @@ HWTEST_F(ModuleWithDebuggerL0MultiTileTest, GivenSubDeviceWhenCreatingModuleThen
EXPECT_EQ(1u, debuggerL0Hw->notifyModuleCreateCount);
EXPECT_EQ(subDevice0->getNEODevice(), debuggerL0Hw->notifyModuleLoadAllocationsCapturedDevice);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(1, memoryOperationsHandler->makeResidentCalledCount);
}
@@ -488,6 +500,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithRelocationsWhenInitializing
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -515,6 +529,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithRelocationsWhenInitializing
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
EXPECT_EQ(reinterpret_cast<char *>(kernelInfo->kernelDescriptor.external.relocatedDebugData.get()), getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
}
@@ -542,9 +557,11 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenBuiltinModuleWhenInitializingModuleThenM
kernelMock.module = moduleMock.get();
kernelMock.immutableData.kernelInfo = kernelInfo;
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0;
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
moduleMock->kernelImmData = &kernelMock.immutableData;
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
auto debugData = MockElfEncoder<>::createRelocateableDebugDataElf();
kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast<uint32_t>(debugData.size());
@@ -557,6 +574,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenBuiltinModuleWhenInitializingModuleThenM
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
EXPECT_EQ(nullptr, getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
}
@@ -575,6 +593,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithoutRelocationsWhenInitializ
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -608,6 +628,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithoutRelocationsWhenInitializ
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
EXPECT_EQ(kernelInfo->kernelDescriptor.external.debugData->vIsa, getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
}
@@ -626,6 +647,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenInitializingModuleThenDoN
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -642,6 +665,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenInitializingModuleThenDoN
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
}
@@ -699,9 +723,12 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinNoDebugDataWhenInitializing
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
}
@@ -779,6 +806,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNonZebinBinaryWhenDestroyModuleThenModul
moduleMock->kernelImmData = &kernelMock.immutableData;
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
@@ -795,6 +824,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNonZebinBinaryWhenDestroyModuleThenModul
kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0;
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
moduleMock->destroy();
moduleMock.release();
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleDestroyCount);
@@ -814,6 +844,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenDestroyingModuleThenNotif
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -829,6 +861,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenDestroyingModuleThenNotif
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
moduleMock->destroy();
moduleMock.release();
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleDestroyCount);
@@ -853,7 +886,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitial
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
auto zebin = ZebinTestData::ValidEmptyProgram<>();

View File

@@ -331,7 +331,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSet
EXPECT_EQ(nullptr, mockKernel.perThreadDataForWholeThreadGroup);
}
TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeDimensionWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
Mock<Kernel> mockKernel;
Mock<Module> mockModule(this->device, nullptr);
for (auto i = 0u; i < 3u; i++) {
@@ -800,6 +800,8 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
std::unique_ptr<L0::ult::MockModule> moduleMock = std::make_unique<L0::ult::MockModule>(device, moduleBuildLog, ModuleType::Builtin);
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
moduleMock->translationUnit->programInfo.linkerInput = std::move(linkerInput);
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
mockTranslationUnit->processUnpackedBinaryCallBase = false;
uint32_t kernelHeap = 0;
auto kernelInfo = new KernelInfo();
@@ -821,6 +823,7 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleMock->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
@@ -941,7 +944,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -983,7 +986,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1024,7 +1027,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1066,7 +1069,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFail
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1109,7 +1112,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1144,7 +1147,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1185,7 +1188,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->maxGroupSize = 10;
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());

View File

@@ -589,9 +589,11 @@ struct ModuleSpecConstantsFixture : public DeviceFixture {
auto module = new Module(device, nullptr, ModuleType::User);
module->translationUnit.reset(mockTranslationUnit);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module->initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) {
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT2[i]));
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT1[i]));
@@ -646,9 +648,11 @@ struct ModuleSpecConstantsFixture : public DeviceFixture {
auto module = new Module(device, nullptr, ModuleType::User);
module->translationUnit.reset(mockTranslationUnit);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module->initialize(&combinedModuleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) {
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT2[i]));
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT1[i]));
@@ -937,6 +941,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get();
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
loadModules(testMultiple);
@@ -955,6 +960,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module->initialize(&combinedModuleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
module->destroy();
}
void runSprivLinkBuildWithOneModule() {
@@ -963,6 +969,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get();
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
loadModules(testSingle);
@@ -979,6 +986,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module->initialize(&combinedModuleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
module->destroy();
}
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
@@ -1045,6 +1053,7 @@ HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModul
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
linkerInput->valid = false;
@@ -1063,6 +1072,7 @@ HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModul
ze_result_t result = ZE_RESULT_SUCCESS;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_ERROR_MODULE_LINK_FAILURE);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
}
HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCreatingModuleThenModuleIsNotLinkedFully) {
@@ -1071,6 +1081,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
@@ -1093,6 +1104,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_FALSE(module.isFullyLinked);
}
@@ -1102,6 +1114,7 @@ HWTEST_F(ModuleLinkingTest, givenModuleCompiledThenCachingIsTrue) {
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
@@ -1126,6 +1139,7 @@ HWTEST_F(ModuleLinkingTest, givenModuleCompiledThenCachingIsTrue) {
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_TRUE(mockCompiler->cachingPassed);
}
@@ -2304,10 +2318,14 @@ HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromNativeBinaryThenSetsUpPacked
target.maxPointerSizeInBytes = programTokens.header->GPUPointerSizeInBytes;
auto arData = encoder.encode();
L0::ModuleTranslationUnit moduleTuValid(this->device);
auto moduleTuValid = MockModuleTranslationUnit{this->device};
moduleTuValid.processUnpackedBinaryCallBase = false;
moduleTuValid.setDummyKernelInfo();
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTuValid.createFromNativeBinary(reinterpret_cast<const char *>(arData.data()), arData.size());
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTuValid.processUnpackedBinaryCalled, 1u);
EXPECT_NE(moduleTuValid.packedDeviceBinarySize, arData.size());
}
@@ -2326,6 +2344,113 @@ HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromZebinThenAppendAllowZebinFla
EXPECT_STREQ(expectedOptions.c_str(), moduleTu.options.c_str());
}
HWTEST2_F(ModuleTranslationUnitTest, givenLargeGrfAndSimd16WhenProcessingBinaryThenKernelGroupSizeReducedToFitWithinSubslice, IsWithinXeGfxFamily) {
std::string validZeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
kernels:
- name : kernel_with_default_maxWGS
execution_env :
simd_size : 8
grf_count: )===" +
std::to_string(GrfConfig::DefaultGrfNumber) + R"===(
- name : kernel_with_reduced_maxWGS
execution_env :
simd_size : 16
grf_count: )===" +
std::to_string(GrfConfig::LargeGrfNumber) + "\n";
uint8_t kernelIsa[8]{0U};
ZebinTestData::ValidEmptyProgram zebin;
zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo);
zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef<const uint8_t>::fromAny(validZeInfo.data(), validZeInfo.size()));
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_default_maxWGS", {kernelIsa, sizeof(kernelIsa)});
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_reduced_maxWGS", {kernelIsa, sizeof(kernelIsa)});
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
MockModule mockModule{this->device, nullptr, ModuleType::User};
mockModule.maxGroupSize = static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize);
auto mockTU = mockModule.translationUnit.get();
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
uint32_t groupSize[3] = {8, 4, (mockModule.maxGroupSize >> 5)}; // default max WGS
Mock<Kernel> defaultKernel;
defaultKernel.module = &mockModule;
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
Mock<Kernel> reducedKernel;
reducedKernel.module = &mockModule;
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, reducedKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
groupSize[2] >>= 2; // align to max WGS reduced due to SIMD16 + LargeGrf
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
}
HWTEST2_F(ModuleTranslationUnitTest, givenLargeGrfAndSimd16WhenProcessingBinaryThenSuggestedKernelGroupSizeFitsWithinSubslice, IsWithinXeGfxFamily) {
std::string validZeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
kernels:
- name : kernel_with_default_maxWGS
execution_env :
simd_size : 8
grf_count: )===" +
std::to_string(GrfConfig::DefaultGrfNumber) + R"===(
- name : kernel_with_reduced_maxWGS
execution_env :
simd_size : 16
grf_count: )===" +
std::to_string(GrfConfig::LargeGrfNumber) + "\n";
uint8_t kernelIsa[8]{0U};
ZebinTestData::ValidEmptyProgram zebin;
zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo);
zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef<const uint8_t>::fromAny(validZeInfo.data(), validZeInfo.size()));
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_default_maxWGS", {kernelIsa, sizeof(kernelIsa)});
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_reduced_maxWGS", {kernelIsa, sizeof(kernelIsa)});
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
MockModule mockModule{this->device, nullptr, ModuleType::User};
mockModule.maxGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
auto mockTU = mockModule.translationUnit.get();
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
uint32_t groupSize[3] = {0u, 0u, 0u};
Mock<Kernel> defaultKernel;
defaultKernel.module = &mockModule;
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
Mock<Kernel> reducedKernel;
reducedKernel.module = &mockModule;
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
}
TEST_F(ModuleTranslationUnitTest, WhenCreatingFromZeBinaryAndGlobalsAreExportedThenTheirAllocationTypeIsUSMDevice) {
std::string zeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
kernels:
@@ -2476,9 +2601,11 @@ HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOpti
DebugManager.flags.DisableStatelessToStatefulOptimization.set(1);
MockModuleTranslationUnit moduleTu(this->device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
}
@@ -2487,10 +2614,12 @@ HWTEST_F(ModuleTranslationUnitTest, givenInternalOptionsThenLSCCachePolicyIsSet)
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
MockModuleTranslationUnit moduleTu(this->device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
const auto &compilerProductHelper = rootDeviceEnvironment->getHelper<CompilerProductHelper>();
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
auto expectedPolicy = compilerProductHelper.getCachingPolicyOptions(false);
if (expectedPolicy != nullptr) {
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find(expectedPolicy), std::string::npos);
@@ -2507,9 +2636,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenDebugFlagSetToWbWhenGetInternalOptions
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
MockModuleTranslationUnit moduleTu(this->device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=7 -cl-load-cache-default=4"), std::string::npos);
}
@@ -2521,9 +2652,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenDebugFlagSetForceAllResourcesUncachedW
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
MockModuleTranslationUnit moduleTu(this->device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=1 -cl-load-cache-default=1"), std::string::npos);
}
@@ -2532,9 +2665,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenAtLeastXeHpgCoreWhenGetInternalOptions
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
MockModuleTranslationUnit moduleTu(this->device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=2 -cl-load-cache-default=4"), std::string::npos);
}
@@ -2544,9 +2679,11 @@ HWTEST_F(ModuleTranslationUnitTest, givenForceToStatelessRequiredWhenBuildingMod
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
MockModuleTranslationUnit moduleTu(device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
const auto &compilerProductHelper = rootDeviceEnvironment->getHelper<CompilerProductHelper>();
if (compilerProductHelper.isForceToStatelessRequired()) {
@@ -2587,11 +2724,13 @@ HWTEST2_F(ModuleTranslationUnitTest, givenSourceLevelDebuggerAndAllowZebinBuildO
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
MockModuleTranslationUnit moduleTu(device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_SUCCESS;
auto buildOption = NEO::CompilerOptions::allowZebin.str();
result = moduleTu.buildFromSpirV("", 0U, buildOption.c_str(), "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(mockCompilerInterface->receivedApiOptions.find(NEO::CompilerOptions::allowZebin.str()), std::string::npos);
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find(NEO::CompilerOptions::disableZebin.str()), std::string::npos);
}
@@ -2602,11 +2741,13 @@ HWTEST_F(ModuleTranslationUnitTest, givenAllowZebinBuildOptionWhenBuildWithSpirv
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
MockModuleTranslationUnit moduleTu(device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
auto buildOption = NEO::CompilerOptions::allowZebin.str();
result = moduleTu.buildFromSpirV("", 0U, buildOption.c_str(), "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
EXPECT_NE(mockCompilerInterface->receivedApiOptions.find(NEO::CompilerOptions::allowZebin.str()), std::string::npos);
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find(NEO::CompilerOptions::disableZebin.str()), std::string::npos);
}
@@ -2623,10 +2764,12 @@ HWTEST_F(ModuleTranslationUnitTest, givenSourceLevelDebuggerWhenBuildWithSpirvTh
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
MockModuleTranslationUnit moduleTu(device);
moduleTu.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
}
TEST(ModuleBuildLog, WhenGreaterBufferIsPassedToGetStringThenOutputSizeIsOverridden) {

View File

@@ -299,6 +299,7 @@ TEST_F(ModuleTests, givenLargeGrfFlagSetWhenCreatingModuleThenOverrideInternalFl
moduleDesc.inputSize = src.size();
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
Module module(device, nullptr, ModuleType::User);
module.translationUnit.reset(mockTranslationUnit);
@@ -306,6 +307,7 @@ TEST_F(ModuleTests, givenLargeGrfFlagSetWhenCreatingModuleThenOverrideInternalFl
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
EXPECT_EQ(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-128-GRF-per-thread"), std::string::npos);
@@ -328,6 +330,7 @@ TEST_F(ModuleTests, givenAutoGrfFlagSetWhenCreatingModuleThenOverrideInternalFla
moduleDesc.inputSize = src.size();
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
Module module(device, nullptr, ModuleType::User);
module.translationUnit.reset(mockTranslationUnit);
@@ -335,6 +338,7 @@ TEST_F(ModuleTests, givenAutoGrfFlagSetWhenCreatingModuleThenOverrideInternalFla
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->receivedApiOptions.find("-cl-intel-enable-auto-large-GRF-mode"), std::string::npos);
EXPECT_EQ(pMockCompilerInterface->receivedApiOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
@@ -357,6 +361,7 @@ TEST_F(ModuleTests, givenDefaultGrfFlagSetWhenCreatingModuleThenOverrideInternal
moduleDesc.inputSize = src.size();
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
mockTranslationUnit->processUnpackedBinaryCallBase = false;
Module module(device, nullptr, ModuleType::User);
module.translationUnit.reset(mockTranslationUnit);
@@ -364,6 +369,7 @@ TEST_F(ModuleTests, givenDefaultGrfFlagSetWhenCreatingModuleThenOverrideInternal
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = module.initialize(&moduleDesc, neoDevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
EXPECT_EQ(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-128-GRF-per-thread"), std::string::npos);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -50,10 +50,12 @@ HWTEST2_F(KernelPropertyTest, givenDG2WhenGetInternalOptionsThenWriteBackBuildOp
auto pMockCompilerInterface = new MockCompilerInterface;
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
MockModuleTranslationUnit moduleTu(this->device);
MockModuleTranslationUnit mockTranslationUnit(this->device);
mockTranslationUnit.processUnpackedBinaryCallBase = false;
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
result = mockTranslationUnit.buildFromSpirV("", 0U, nullptr, "", nullptr);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(mockTranslationUnit.processUnpackedBinaryCalled, 1u);
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=7 -cl-load-cache-default=4"), std::string::npos);
}