refactor(l0): follow-up cleanup after adding LargeGRF fix

Resubmission of 871a3bd11d
Reverted by 9882e992ac due to Elmo
regression (most likely not related to the change anyway).

Fixup for 2778043d67

Related-To: NEO-7684, HSD-18027378546

Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2023-01-31 13:56:03 +00:00
committed by Compute-Runtime-Automation
parent e3ce887662
commit 49424eb859
7 changed files with 13 additions and 51 deletions

View File

@@ -624,8 +624,6 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
registerElfInDebuggerL0();
this->defaultMaxGroupSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().maxWorkGroupSize);
checkIfPrivateMemoryPerDispatchIsNeeded();
linkageSuccessful = this->linkBinary();
@@ -717,7 +715,7 @@ const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelN
}
uint32_t ModuleImp::getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const {
return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, this->defaultMaxGroupSize);
return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize));
}
void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) {

View File

@@ -163,7 +163,6 @@ struct ModuleImp : public Module {
std::unique_ptr<ModuleTranslationUnit> translationUnit;
ModuleBuildLog *moduleBuildLog = nullptr;
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
uint32_t defaultMaxGroupSize = 0U;
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
NEO::Linker::RelocatedSymbolsMap symbols;

View File

@@ -58,7 +58,7 @@ ModuleImmutableDataFixture::MockModule::MockModule(L0::Device *device,
uint32_t perHwThreadPrivateMemorySize,
MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) {
this->mockKernelImmData->setDevice(device);
this->translationUnit.reset(new MockModuleTranslationUnit(this->translationUnit.get()));
this->translationUnit.reset(new MockModuleTranslationUnit(this->device));
}
void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNeeded() {

View File

@@ -51,7 +51,6 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
struct MockModule : public L0::ModuleImp {
using ModuleImp::allocatePrivateMemoryPerDispatch;
using ModuleImp::defaultMaxGroupSize;
using ModuleImp::getKernelImmutableDataVector;
using ModuleImp::kernelImmDatas;
using ModuleImp::translationUnit;

View File

@@ -24,26 +24,6 @@ struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
MockModuleTranslationUnit(L0::Device *device) : BaseClass{device} {}
MockModuleTranslationUnit(L0::ModuleTranslationUnit *orig) : BaseClass{orig->device} {
std::swap(this->globalConstBuffer, orig->globalConstBuffer);
std::swap(this->globalVarBuffer, orig->globalVarBuffer);
std::swap(this->programInfo, orig->programInfo);
std::swap(this->options, orig->options);
std::swap(this->shouldSuppressRebuildWarning, orig->shouldSuppressRebuildWarning);
std::swap(this->buildLog, orig->buildLog);
std::swap(this->irBinary, orig->irBinary);
std::swap(this->irBinarySize, orig->irBinarySize);
std::swap(this->unpackedDeviceBinary, orig->unpackedDeviceBinary);
std::swap(this->unpackedDeviceBinarySize, orig->unpackedDeviceBinarySize);
std::swap(this->packedDeviceBinary, orig->packedDeviceBinary);
std::swap(this->packedDeviceBinarySize, orig->packedDeviceBinarySize);
std::swap(this->debugData, orig->debugData);
std::swap(this->debugDataSize, orig->debugDataSize);
std::swap(this->alignedvIsas, orig->alignedvIsas);
std::swap(this->specConstantsValues, orig->specConstantsValues);
std::swap(this->isBuiltIn, orig->isBuiltIn);
}
ADDMETHOD(processUnpackedBinary, ze_result_t, true, ZE_RESULT_SUCCESS, (), ());
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
@@ -82,7 +62,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass::translationUnit;
using BaseClass::type;
using BaseClass::unresolvedExternalsInfo;
uint32_t &maxGroupSize{BaseClass::defaultMaxGroupSize};
WhiteBox(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
: ::L0::ModuleImp{device, moduleBuildLog, type} {
@@ -122,13 +101,11 @@ struct MockModule : public L0::ModuleImp {
using ModuleImp::populateHostGlobalSymbolsMap;
using ModuleImp::symbols;
using ModuleImp::translationUnit;
uint32_t &maxGroupSize = ModuleImp::defaultMaxGroupSize;
MockModule(L0::Device *device,
L0::ModuleBuildLog *moduleBuildLog,
L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) {
this->translationUnit.reset(new MockModuleTranslationUnit{device});
this->maxGroupSize = 32u;
};
~MockModule() override = default;

View File

@@ -945,7 +945,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -987,7 +986,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1028,7 +1026,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1070,7 +1067,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFail
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1113,7 +1109,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1148,7 +1143,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
@@ -1189,7 +1183,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
ModuleType::User,
32u,
mockKernelImmutableData.get());
module->defaultMaxGroupSize = 10;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());

View File

@@ -2362,17 +2362,17 @@ kernels:
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
MockModule mockModule{this->device, nullptr, ModuleType::User};
mockModule.maxGroupSize = static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize);
auto maxWorkGroupSize = static_cast<uint32_t>(this->neoDevice->deviceInfo.maxWorkGroupSize);
auto mockTU = mockModule.translationUnit.get();
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1));
uint32_t groupSize[3] = {8, 4, (mockModule.maxGroupSize >> 5)}; // default max WGS
uint32_t groupSize[3] = {8, 4, (maxWorkGroupSize >> 5)}; // default max WGS
Mock<Kernel> defaultKernel;
defaultKernel.module = &mockModule;
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
@@ -2409,15 +2409,15 @@ kernels:
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
MockModule mockModule{this->device, nullptr, ModuleType::User};
mockModule.maxGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
auto maxWorkGroupSize = static_cast<uint32_t>(this->neoDevice->deviceInfo.maxWorkGroupSize);
auto mockTU = mockModule.translationUnit.get();
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize);
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1));
uint32_t groupSize[3] = {0u, 0u, 0u};
Mock<Kernel> defaultKernel;
@@ -2425,18 +2425,18 @@ kernels:
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize);
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize);
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
Mock<Kernel> reducedKernel;
reducedKernel.module = &mockModule;
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
@@ -3412,8 +3412,6 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat
TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinModuleThenSegmentIsNotPatchedAndImplicitArgsAreNotRequired) {
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
pModule->maxGroupSize = 32;
char data[64]{};
auto kernelInfo = new KernelInfo();
kernelInfo->heapInfo.KernelHeapSize = 64;
@@ -3454,7 +3452,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
DebugManager.flags.PrintDebugMessages.set(true);
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
pModule->maxGroupSize = 32;
char data[64]{};
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();
@@ -3501,7 +3498,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat
DebugManager.flags.PrintDebugMessages.set(true);
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
pModule->maxGroupSize = 32;
char data[64]{};
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();