mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-16 04:08:35 +08:00
fix(l0): check for largeGRF when computing maxWorkGroupSize
Sizing context (PVC): When using LargeGRF (a.k.a GRF256) there are only 4 HW threads per EU (instead of default 8). Together with SIMD16 that means that there can be max 64 work-items per EU. With 8 EU per subslice this gives 512 work-items on a single subslice. For correct intra-WG synchronization all its WIs must be executed on the same subslice (to access the same SLM, where the synchronization primitives are stored). Thus, with SIMD16 and LargeGRF the work-group size must not exceed 512 (PVC example). So far `maxWorkGroupSize` is taken solely from a DeviceInfo structure both in `ModuleTranslationUnit::processUnpackedBinary()` and `ModuleImp::initialize()`. This method does not take kernel parameters (LargeGRF) into account. It allows to submit a kernel using LargeGRF with SIMD16 with the work-group size set to 1024. That leads to a hang. Fix the `.maxWorkGroupSize` computation so that it takes the kernel parameters into consideration. Add new (for discrete platforms >= XeHP) and adapt existing tests, fix cosmetics by the way. Similar check for OCL: https://github.com/intel/compute-runtime/blob/master/opencl/source/comma nd_queue/enqueue_kernel.h#L130 Related-To: NEO-7684 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a3aa7a1326
commit
2778043d67
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -10,6 +10,8 @@
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace L0 {
|
||||
@@ -55,7 +57,8 @@ ModuleImmutableDataFixture::MockModule::MockModule(L0::Device *device,
|
||||
L0::ModuleType type,
|
||||
uint32_t perHwThreadPrivateMemorySize,
|
||||
MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) {
|
||||
mockKernelImmData->setDevice(device);
|
||||
this->mockKernelImmData->setDevice(device);
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit(this->translationUnit.get()));
|
||||
}
|
||||
|
||||
void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
@@ -110,8 +113,8 @@ void ModuleImmutableDataFixture::tearDown() {
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
|
||||
L0::Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
|
||||
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
|
||||
|
||||
*result = module->initialize(desc, device->getNEODevice());
|
||||
|
||||
@@ -51,9 +51,9 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::allocatePrivateMemoryPerDispatch;
|
||||
using ModuleImp::defaultMaxGroupSize;
|
||||
using ModuleImp::getKernelImmutableDataVector;
|
||||
using ModuleImp::kernelImmDatas;
|
||||
using ModuleImp::maxGroupSize;
|
||||
using ModuleImp::translationUnit;
|
||||
using ModuleImp::type;
|
||||
|
||||
@@ -124,8 +124,8 @@ struct ModuleFixture : public DeviceFixture {
|
||||
return kernelImmDatas;
|
||||
}
|
||||
|
||||
static Module *create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
|
||||
static L0::Module *create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
|
||||
};
|
||||
|
||||
void setUp();
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/compiler_interface/external_functions.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/test/common/mocks/mock_cif.h"
|
||||
#include "shared/test/common/mocks/mock_compiler_interface.h"
|
||||
#include "shared/test/common/test_macros/mock_method_macros.h"
|
||||
@@ -17,6 +19,52 @@
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
|
||||
using BaseClass = L0::ModuleTranslationUnit;
|
||||
|
||||
MockModuleTranslationUnit(L0::Device *device) : BaseClass{device} {}
|
||||
|
||||
MockModuleTranslationUnit(L0::ModuleTranslationUnit *orig) : BaseClass{orig->device} {
|
||||
std::swap(this->globalConstBuffer, orig->globalConstBuffer);
|
||||
std::swap(this->globalVarBuffer, orig->globalVarBuffer);
|
||||
std::swap(this->programInfo, orig->programInfo);
|
||||
std::swap(this->options, orig->options);
|
||||
std::swap(this->shouldSuppressRebuildWarning, orig->shouldSuppressRebuildWarning);
|
||||
std::swap(this->buildLog, orig->buildLog);
|
||||
std::swap(this->irBinary, orig->irBinary);
|
||||
std::swap(this->irBinarySize, orig->irBinarySize);
|
||||
std::swap(this->unpackedDeviceBinary, orig->unpackedDeviceBinary);
|
||||
std::swap(this->unpackedDeviceBinarySize, orig->unpackedDeviceBinarySize);
|
||||
std::swap(this->packedDeviceBinary, orig->packedDeviceBinary);
|
||||
std::swap(this->packedDeviceBinarySize, orig->packedDeviceBinarySize);
|
||||
std::swap(this->debugData, orig->debugData);
|
||||
std::swap(this->debugDataSize, orig->debugDataSize);
|
||||
std::swap(this->alignedvIsas, orig->alignedvIsas);
|
||||
std::swap(this->specConstantsValues, orig->specConstantsValues);
|
||||
std::swap(this->isBuiltIn, orig->isBuiltIn);
|
||||
}
|
||||
|
||||
ADDMETHOD(processUnpackedBinary, ze_result_t, true, ZE_RESULT_SUCCESS, (), ());
|
||||
|
||||
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
|
||||
if (unpackedDeviceBinarySize && unpackedDeviceBinary) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return ModuleTranslationUnit::compileGenBinary(inputArgs, staticLink);
|
||||
}
|
||||
}
|
||||
|
||||
void setDummyKernelInfo() {
|
||||
this->programInfo.kernelInfos.push_back(dummyKernelInfo.get());
|
||||
}
|
||||
|
||||
std::unique_ptr<NEO::KernelInfo> dummyKernelInfo = {};
|
||||
};
|
||||
|
||||
constexpr inline MockModuleTranslationUnit *toMockPtr(L0::ModuleTranslationUnit *tu) {
|
||||
return static_cast<MockModuleTranslationUnit *>(tu);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass = ::L0::ModuleImp;
|
||||
@@ -30,11 +78,16 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass::isFunctionSymbolExportEnabled;
|
||||
using BaseClass::isGlobalSymbolExportEnabled;
|
||||
using BaseClass::kernelImmDatas;
|
||||
using BaseClass::maxGroupSize;
|
||||
using BaseClass::symbols;
|
||||
using BaseClass::translationUnit;
|
||||
using BaseClass::type;
|
||||
using BaseClass::unresolvedExternalsInfo;
|
||||
uint32_t &maxGroupSize{BaseClass::defaultMaxGroupSize};
|
||||
|
||||
WhiteBox(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
|
||||
: ::L0::ModuleImp{device, moduleBuildLog, type} {
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit{device});
|
||||
}
|
||||
};
|
||||
|
||||
using Module = WhiteBox<::L0::Module>;
|
||||
@@ -50,7 +103,7 @@ struct Mock<Module> : public Module {
|
||||
ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction));
|
||||
ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary));
|
||||
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *kernelName));
|
||||
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ());
|
||||
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, (const NEO::KernelDescriptor &));
|
||||
ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames));
|
||||
ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog));
|
||||
@@ -59,23 +112,6 @@ struct Mock<Module> : public Module {
|
||||
ADDMETHOD_CONST_NOBASE(isDebugEnabled, bool, false, ());
|
||||
};
|
||||
|
||||
struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
|
||||
MockModuleTranslationUnit(L0::Device *device) : L0::ModuleTranslationUnit(device) {
|
||||
}
|
||||
|
||||
ze_result_t processUnpackedBinary() override {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
|
||||
if (unpackedDeviceBinarySize && unpackedDeviceBinary) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return ModuleTranslationUnit::compileGenBinary(inputArgs, staticLink);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::debugEnabled;
|
||||
using ModuleImp::debugModuleHandle;
|
||||
@@ -86,11 +122,13 @@ struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::populateHostGlobalSymbolsMap;
|
||||
using ModuleImp::symbols;
|
||||
using ModuleImp::translationUnit;
|
||||
uint32_t &maxGroupSize = ModuleImp::defaultMaxGroupSize;
|
||||
|
||||
MockModule(L0::Device *device,
|
||||
L0::ModuleBuildLog *moduleBuildLog,
|
||||
L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) {
|
||||
maxGroupSize = 32;
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit{device});
|
||||
this->maxGroupSize = 32u;
|
||||
};
|
||||
|
||||
~MockModule() override = default;
|
||||
|
||||
@@ -181,6 +181,8 @@ TEST_F(ModuleWithSLDTest, GivenNoDebugDataWhenInitializingModuleThenRelocatedDeb
|
||||
moduleBuildLog,
|
||||
ModuleType::User);
|
||||
module->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(module->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -203,6 +205,7 @@ TEST_F(ModuleWithSLDTest, GivenNoDebugDataWhenInitializingModuleThenRelocatedDeb
|
||||
result = module->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
|
||||
}
|
||||
|
||||
@@ -222,6 +225,8 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithSingleRelocationWhenInitializingModu
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -252,6 +257,7 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithSingleRelocationWhenInitializingModu
|
||||
result = moduleMock->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
|
||||
}
|
||||
|
||||
@@ -271,6 +277,8 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithMultipleRelocationsWhenInitializingM
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -298,6 +306,7 @@ TEST_F(ModuleWithSLDTest, GivenDebugDataWithMultipleRelocationsWhenInitializingM
|
||||
result = moduleMock->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData);
|
||||
}
|
||||
|
||||
@@ -444,6 +453,8 @@ HWTEST_F(ModuleWithDebuggerL0MultiTileTest, GivenSubDeviceWhenCreatingModuleThen
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(subDevice0, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(subDevice0);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -471,6 +482,7 @@ HWTEST_F(ModuleWithDebuggerL0MultiTileTest, GivenSubDeviceWhenCreatingModuleThen
|
||||
EXPECT_EQ(1u, debuggerL0Hw->notifyModuleCreateCount);
|
||||
EXPECT_EQ(subDevice0->getNEODevice(), debuggerL0Hw->notifyModuleLoadAllocationsCapturedDevice);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(1, memoryOperationsHandler->makeResidentCalledCount);
|
||||
}
|
||||
|
||||
@@ -488,6 +500,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithRelocationsWhenInitializing
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -515,6 +529,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithRelocationsWhenInitializing
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
|
||||
EXPECT_EQ(reinterpret_cast<char *>(kernelInfo->kernelDescriptor.external.relocatedDebugData.get()), getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
|
||||
}
|
||||
@@ -542,9 +557,11 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenBuiltinModuleWhenInitializingModuleThenM
|
||||
kernelMock.module = moduleMock.get();
|
||||
kernelMock.immutableData.kernelInfo = kernelInfo;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0;
|
||||
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
|
||||
moduleMock->kernelImmData = &kernelMock.immutableData;
|
||||
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
auto debugData = MockElfEncoder<>::createRelocateableDebugDataElf();
|
||||
kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast<uint32_t>(debugData.size());
|
||||
@@ -557,6 +574,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenBuiltinModuleWhenInitializingModuleThenM
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
|
||||
EXPECT_EQ(nullptr, getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
|
||||
}
|
||||
@@ -575,6 +593,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithoutRelocationsWhenInitializ
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -608,6 +628,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithoutRelocationsWhenInitializ
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->registerElfAndLinkCount);
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
|
||||
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get());
|
||||
EXPECT_EQ(kernelInfo->kernelDescriptor.external.debugData->vIsa, getMockDebuggerL0Hw<FamilyType>()->lastReceivedElf);
|
||||
}
|
||||
@@ -626,6 +647,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenInitializingModuleThenDoN
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -642,6 +665,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenInitializingModuleThenDoN
|
||||
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
|
||||
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
|
||||
}
|
||||
@@ -699,9 +723,12 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinNoDebugDataWhenInitializing
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
|
||||
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->registerElfCount);
|
||||
EXPECT_EQ(0u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleCreateCount);
|
||||
}
|
||||
@@ -779,6 +806,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNonZebinBinaryWhenDestroyModuleThenModul
|
||||
|
||||
moduleMock->kernelImmData = &kernelMock.immutableData;
|
||||
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
kernelInfo->kernelDescriptor.external.debugData = std::make_unique<NEO::DebugData>();
|
||||
|
||||
@@ -795,6 +824,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNonZebinBinaryWhenDestroyModuleThenModul
|
||||
kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0;
|
||||
|
||||
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
moduleMock->destroy();
|
||||
moduleMock.release();
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleDestroyCount);
|
||||
@@ -814,6 +844,8 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenDestroyingModuleThenNotif
|
||||
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, moduleBuildLog, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -829,6 +861,7 @@ HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenDestroyingModuleThenNotif
|
||||
moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||
|
||||
EXPECT_EQ(moduleMock->initialize(&moduleDesc, neoDevice), ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
moduleMock->destroy();
|
||||
moduleMock.release();
|
||||
EXPECT_EQ(1u, getMockDebuggerL0Hw<FamilyType>()->notifyModuleDestroyCount);
|
||||
@@ -853,7 +886,6 @@ HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitial
|
||||
auto kernelImmutableData = ::std::make_unique<KernelImmutableData>(device);
|
||||
kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false);
|
||||
std::unique_ptr<MockModule> moduleMock = std::make_unique<MockModule>(device, nullptr, ModuleType::User);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData));
|
||||
|
||||
auto zebin = ZebinTestData::ValidEmptyProgram<>();
|
||||
|
||||
@@ -331,7 +331,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSet
|
||||
EXPECT_EQ(nullptr, mockKernel.perThreadDataForWholeThreadGroup);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
|
||||
TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeDimensionWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
|
||||
Mock<Kernel> mockKernel;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
@@ -800,6 +800,8 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
|
||||
std::unique_ptr<L0::ult::MockModule> moduleMock = std::make_unique<L0::ult::MockModule>(device, moduleBuildLog, ModuleType::Builtin);
|
||||
moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
|
||||
moduleMock->translationUnit->programInfo.linkerInput = std::move(linkerInput);
|
||||
auto mockTranslationUnit = toMockPtr(moduleMock->translationUnit.get());
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
auto kernelInfo = new KernelInfo();
|
||||
@@ -821,6 +823,7 @@ TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotC
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleMock->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
|
||||
|
||||
EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
|
||||
@@ -941,7 +944,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -983,7 +986,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1024,7 +1027,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1066,7 +1069,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFail
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1109,7 +1112,7 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1144,7 +1147,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1185,7 +1188,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->maxGroupSize = 10;
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
@@ -589,9 +589,11 @@ struct ModuleSpecConstantsFixture : public DeviceFixture {
|
||||
|
||||
auto module = new Module(device, nullptr, ModuleType::User);
|
||||
module->translationUnit.reset(mockTranslationUnit);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module->initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) {
|
||||
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT2[i]));
|
||||
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT1[i]));
|
||||
@@ -646,9 +648,11 @@ struct ModuleSpecConstantsFixture : public DeviceFixture {
|
||||
|
||||
auto module = new Module(device, nullptr, ModuleType::User);
|
||||
module->translationUnit.reset(mockTranslationUnit);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module->initialize(&combinedModuleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) {
|
||||
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT2[i]));
|
||||
EXPECT_EQ(static_cast<uint64_t>(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast<uint64_t>(mockCompiler->moduleSpecConstantsValuesT1[i]));
|
||||
@@ -937,6 +941,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
|
||||
auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get();
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
|
||||
mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
loadModules(testMultiple);
|
||||
|
||||
@@ -955,6 +960,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module->initialize(&combinedModuleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
module->destroy();
|
||||
}
|
||||
void runSprivLinkBuildWithOneModule() {
|
||||
@@ -963,6 +969,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
|
||||
auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get();
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
|
||||
mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
loadModules(testSingle);
|
||||
|
||||
@@ -979,6 +986,7 @@ struct ModuleStaticLinkFixture : public DeviceFixture {
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module->initialize(&combinedModuleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
module->destroy();
|
||||
}
|
||||
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
|
||||
@@ -1045,6 +1053,7 @@ HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModul
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||
linkerInput->valid = false;
|
||||
@@ -1063,6 +1072,7 @@ HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModul
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_ERROR_MODULE_LINK_FAILURE);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCreatingModuleThenModuleIsNotLinkedFully) {
|
||||
@@ -1071,6 +1081,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||
|
||||
@@ -1093,6 +1104,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_FALSE(module.isFullyLinked);
|
||||
}
|
||||
|
||||
@@ -1102,6 +1114,7 @@ HWTEST_F(ModuleLinkingTest, givenModuleCompiledThenCachingIsTrue) {
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompiler);
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
|
||||
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||
|
||||
@@ -1126,6 +1139,7 @@ HWTEST_F(ModuleLinkingTest, givenModuleCompiledThenCachingIsTrue) {
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
|
||||
EXPECT_TRUE(mockCompiler->cachingPassed);
|
||||
}
|
||||
@@ -2304,10 +2318,14 @@ HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromNativeBinaryThenSetsUpPacked
|
||||
target.maxPointerSizeInBytes = programTokens.header->GPUPointerSizeInBytes;
|
||||
|
||||
auto arData = encoder.encode();
|
||||
L0::ModuleTranslationUnit moduleTuValid(this->device);
|
||||
auto moduleTuValid = MockModuleTranslationUnit{this->device};
|
||||
moduleTuValid.processUnpackedBinaryCallBase = false;
|
||||
moduleTuValid.setDummyKernelInfo();
|
||||
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTuValid.createFromNativeBinary(reinterpret_cast<const char *>(arData.data()), arData.size());
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTuValid.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(moduleTuValid.packedDeviceBinarySize, arData.size());
|
||||
}
|
||||
|
||||
@@ -2326,6 +2344,113 @@ HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromZebinThenAppendAllowZebinFla
|
||||
EXPECT_STREQ(expectedOptions.c_str(), moduleTu.options.c_str());
|
||||
}
|
||||
|
||||
HWTEST2_F(ModuleTranslationUnitTest, givenLargeGrfAndSimd16WhenProcessingBinaryThenKernelGroupSizeReducedToFitWithinSubslice, IsWithinXeGfxFamily) {
|
||||
std::string validZeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
|
||||
kernels:
|
||||
- name : kernel_with_default_maxWGS
|
||||
execution_env :
|
||||
simd_size : 8
|
||||
grf_count: )===" +
|
||||
std::to_string(GrfConfig::DefaultGrfNumber) + R"===(
|
||||
- name : kernel_with_reduced_maxWGS
|
||||
execution_env :
|
||||
simd_size : 16
|
||||
grf_count: )===" +
|
||||
std::to_string(GrfConfig::LargeGrfNumber) + "\n";
|
||||
|
||||
uint8_t kernelIsa[8]{0U};
|
||||
ZebinTestData::ValidEmptyProgram zebin;
|
||||
zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo);
|
||||
zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef<const uint8_t>::fromAny(validZeInfo.data(), validZeInfo.size()));
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_default_maxWGS", {kernelIsa, sizeof(kernelIsa)});
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_reduced_maxWGS", {kernelIsa, sizeof(kernelIsa)});
|
||||
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
MockModule mockModule{this->device, nullptr, ModuleType::User};
|
||||
mockModule.maxGroupSize = static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize);
|
||||
auto mockTU = mockModule.translationUnit.get();
|
||||
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
|
||||
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
|
||||
|
||||
uint32_t groupSize[3] = {8, 4, (mockModule.maxGroupSize >> 5)}; // default max WGS
|
||||
Mock<Kernel> defaultKernel;
|
||||
defaultKernel.module = &mockModule;
|
||||
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
|
||||
|
||||
Mock<Kernel> reducedKernel;
|
||||
reducedKernel.module = &mockModule;
|
||||
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, reducedKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
|
||||
groupSize[2] >>= 2; // align to max WGS reduced due to SIMD16 + LargeGrf
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]));
|
||||
}
|
||||
|
||||
HWTEST2_F(ModuleTranslationUnitTest, givenLargeGrfAndSimd16WhenProcessingBinaryThenSuggestedKernelGroupSizeFitsWithinSubslice, IsWithinXeGfxFamily) {
|
||||
std::string validZeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
|
||||
kernels:
|
||||
- name : kernel_with_default_maxWGS
|
||||
execution_env :
|
||||
simd_size : 8
|
||||
grf_count: )===" +
|
||||
std::to_string(GrfConfig::DefaultGrfNumber) + R"===(
|
||||
- name : kernel_with_reduced_maxWGS
|
||||
execution_env :
|
||||
simd_size : 16
|
||||
grf_count: )===" +
|
||||
std::to_string(GrfConfig::LargeGrfNumber) + "\n";
|
||||
|
||||
uint8_t kernelIsa[8]{0U};
|
||||
ZebinTestData::ValidEmptyProgram zebin;
|
||||
zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo);
|
||||
zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef<const uint8_t>::fromAny(validZeInfo.data(), validZeInfo.size()));
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_default_maxWGS", {kernelIsa, sizeof(kernelIsa)});
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel_with_reduced_maxWGS", {kernelIsa, sizeof(kernelIsa)});
|
||||
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
MockModule mockModule{this->device, nullptr, ModuleType::User};
|
||||
mockModule.maxGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
|
||||
auto mockTU = mockModule.translationUnit.get();
|
||||
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
|
||||
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
|
||||
|
||||
uint32_t groupSize[3] = {0u, 0u, 0u};
|
||||
Mock<Kernel> defaultKernel;
|
||||
defaultKernel.module = &mockModule;
|
||||
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
|
||||
|
||||
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
|
||||
|
||||
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
|
||||
Mock<Kernel> reducedKernel;
|
||||
reducedKernel.module = &mockModule;
|
||||
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
|
||||
|
||||
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
|
||||
}
|
||||
|
||||
TEST_F(ModuleTranslationUnitTest, WhenCreatingFromZeBinaryAndGlobalsAreExportedThenTheirAllocationTypeIsUSMDevice) {
|
||||
std::string zeInfo = std::string("version :\'") + versionToString(zeInfoDecoderVersion) + R"===('
|
||||
kernels:
|
||||
@@ -2476,9 +2601,11 @@ HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOpti
|
||||
DebugManager.flags.DisableStatelessToStatefulOptimization.set(1);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos);
|
||||
}
|
||||
|
||||
@@ -2487,10 +2614,12 @@ HWTEST_F(ModuleTranslationUnitTest, givenInternalOptionsThenLSCCachePolicyIsSet)
|
||||
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
const auto &compilerProductHelper = rootDeviceEnvironment->getHelper<CompilerProductHelper>();
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
auto expectedPolicy = compilerProductHelper.getCachingPolicyOptions(false);
|
||||
if (expectedPolicy != nullptr) {
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find(expectedPolicy), std::string::npos);
|
||||
@@ -2507,9 +2636,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenDebugFlagSetToWbWhenGetInternalOptions
|
||||
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=7 -cl-load-cache-default=4"), std::string::npos);
|
||||
}
|
||||
|
||||
@@ -2521,9 +2652,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenDebugFlagSetForceAllResourcesUncachedW
|
||||
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=1 -cl-load-cache-default=1"), std::string::npos);
|
||||
}
|
||||
|
||||
@@ -2532,9 +2665,11 @@ HWTEST2_F(ModuleTranslationUnitTest, givenAtLeastXeHpgCoreWhenGetInternalOptions
|
||||
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=2 -cl-load-cache-default=4"), std::string::npos);
|
||||
}
|
||||
|
||||
@@ -2544,9 +2679,11 @@ HWTEST_F(ModuleTranslationUnitTest, givenForceToStatelessRequiredWhenBuildingMod
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
|
||||
const auto &compilerProductHelper = rootDeviceEnvironment->getHelper<CompilerProductHelper>();
|
||||
if (compilerProductHelper.isForceToStatelessRequired()) {
|
||||
@@ -2587,11 +2724,13 @@ HWTEST2_F(ModuleTranslationUnitTest, givenSourceLevelDebuggerAndAllowZebinBuildO
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto buildOption = NEO::CompilerOptions::allowZebin.str();
|
||||
|
||||
result = moduleTu.buildFromSpirV("", 0U, buildOption.c_str(), "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(mockCompilerInterface->receivedApiOptions.find(NEO::CompilerOptions::allowZebin.str()), std::string::npos);
|
||||
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find(NEO::CompilerOptions::disableZebin.str()), std::string::npos);
|
||||
}
|
||||
@@ -2602,11 +2741,13 @@ HWTEST_F(ModuleTranslationUnitTest, givenAllowZebinBuildOptionWhenBuildWithSpirv
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
auto buildOption = NEO::CompilerOptions::allowZebin.str();
|
||||
|
||||
result = moduleTu.buildFromSpirV("", 0U, buildOption.c_str(), "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(mockCompilerInterface->receivedApiOptions.find(NEO::CompilerOptions::allowZebin.str()), std::string::npos);
|
||||
EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find(NEO::CompilerOptions::disableZebin.str()), std::string::npos);
|
||||
}
|
||||
@@ -2623,10 +2764,12 @@ HWTEST_F(ModuleTranslationUnitTest, givenSourceLevelDebuggerWhenBuildWithSpirvTh
|
||||
rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface);
|
||||
|
||||
MockModuleTranslationUnit moduleTu(device);
|
||||
moduleTu.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(moduleTu.processUnpackedBinaryCalled, 1u);
|
||||
}
|
||||
|
||||
TEST(ModuleBuildLog, WhenGreaterBufferIsPassedToGetStringThenOutputSizeIsOverridden) {
|
||||
|
||||
@@ -299,6 +299,7 @@ TEST_F(ModuleTests, givenLargeGrfFlagSetWhenCreatingModuleThenOverrideInternalFl
|
||||
moduleDesc.inputSize = src.size();
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
Module module(device, nullptr, ModuleType::User);
|
||||
|
||||
module.translationUnit.reset(mockTranslationUnit);
|
||||
@@ -306,6 +307,7 @@ TEST_F(ModuleTests, givenLargeGrfFlagSetWhenCreatingModuleThenOverrideInternalFl
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
|
||||
EXPECT_EQ(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-128-GRF-per-thread"), std::string::npos);
|
||||
@@ -328,6 +330,7 @@ TEST_F(ModuleTests, givenAutoGrfFlagSetWhenCreatingModuleThenOverrideInternalFla
|
||||
moduleDesc.inputSize = src.size();
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
Module module(device, nullptr, ModuleType::User);
|
||||
|
||||
module.translationUnit.reset(mockTranslationUnit);
|
||||
@@ -335,6 +338,7 @@ TEST_F(ModuleTests, givenAutoGrfFlagSetWhenCreatingModuleThenOverrideInternalFla
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
|
||||
EXPECT_NE(pMockCompilerInterface->receivedApiOptions.find("-cl-intel-enable-auto-large-GRF-mode"), std::string::npos);
|
||||
EXPECT_EQ(pMockCompilerInterface->receivedApiOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
|
||||
@@ -357,6 +361,7 @@ TEST_F(ModuleTests, givenDefaultGrfFlagSetWhenCreatingModuleThenOverrideInternal
|
||||
moduleDesc.inputSize = src.size();
|
||||
|
||||
auto mockTranslationUnit = new MockModuleTranslationUnit(device);
|
||||
mockTranslationUnit->processUnpackedBinaryCallBase = false;
|
||||
Module module(device, nullptr, ModuleType::User);
|
||||
|
||||
module.translationUnit.reset(mockTranslationUnit);
|
||||
@@ -364,6 +369,7 @@ TEST_F(ModuleTests, givenDefaultGrfFlagSetWhenCreatingModuleThenOverrideInternal
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = module.initialize(&moduleDesc, neoDevice);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit->processUnpackedBinaryCalled, 1u);
|
||||
|
||||
EXPECT_EQ(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-256-GRF-per-thread"), std::string::npos);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-128-GRF-per-thread"), std::string::npos);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -50,10 +50,12 @@ HWTEST2_F(KernelPropertyTest, givenDG2WhenGetInternalOptionsThenWriteBackBuildOp
|
||||
auto pMockCompilerInterface = new MockCompilerInterface;
|
||||
auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()];
|
||||
rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface);
|
||||
MockModuleTranslationUnit moduleTu(this->device);
|
||||
MockModuleTranslationUnit mockTranslationUnit(this->device);
|
||||
mockTranslationUnit.processUnpackedBinaryCallBase = false;
|
||||
ze_result_t result = ZE_RESULT_ERROR_MODULE_BUILD_FAILURE;
|
||||
result = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
result = mockTranslationUnit.buildFromSpirV("", 0U, nullptr, "", nullptr);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(mockTranslationUnit.processUnpackedBinaryCalled, 1u);
|
||||
EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-store-cache-default=7 -cl-load-cache-default=4"), std::string::npos);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user