From 49424eb859972ff0eb2249bbc60e1f8f80725445 Mon Sep 17 00:00:00 2001 From: Maciej Bielski Date: Tue, 31 Jan 2023 13:56:03 +0000 Subject: [PATCH] refactor(l0): follow-up cleanup after adding LargeGRF fix Resubmission of 871a3bd11d45c229fdcb75f91d3c9326fb345e39 Reverted by 9882e992aca83184f21869c2822a62eaa99f1a38 due to Elmo regression (most likely not related to the change anyway). Fixup for 2778043d67ee09d94cf9fe748ed06ee4ec86cd0d Related-To: NEO-7684, HSD-18027378546 Signed-off-by: Maciej Bielski --- level_zero/core/source/module/module_imp.cpp | 4 +-- level_zero/core/source/module/module_imp.h | 1 - .../unit_tests/fixtures/module_fixture.cpp | 2 +- .../test/unit_tests/fixtures/module_fixture.h | 1 - .../core/test/unit_tests/mocks/mock_module.h | 23 ---------------- .../unit_tests/sources/kernel/test_kernel.cpp | 7 ----- .../unit_tests/sources/module/test_module.cpp | 26 ++++++++----------- 7 files changed, 13 insertions(+), 51 deletions(-) diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index c55f7c5a23..0fd9268cc0 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -624,8 +624,6 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo registerElfInDebuggerL0(); - this->defaultMaxGroupSize = static_cast(neoDevice->getDeviceInfo().maxWorkGroupSize); - checkIfPrivateMemoryPerDispatchIsNeeded(); linkageSuccessful = this->linkBinary(); @@ -717,7 +715,7 @@ const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelN } uint32_t ModuleImp::getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const { - return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, this->defaultMaxGroupSize); + return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, static_cast(this->device->getDeviceInfo().maxWorkGroupSize)); } void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) { diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index f2746bfe90..155315c9a0 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -163,7 +163,6 @@ struct ModuleImp : public Module { std::unique_ptr translationUnit; ModuleBuildLog *moduleBuildLog = nullptr; NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr; - uint32_t defaultMaxGroupSize = 0U; std::vector> kernelImmDatas; NEO::Linker::RelocatedSymbolsMap symbols; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp index 8832238931..78c57d9298 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp @@ -58,7 +58,7 @@ ModuleImmutableDataFixture::MockModule::MockModule(L0::Device *device, uint32_t perHwThreadPrivateMemorySize, MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) { this->mockKernelImmData->setDevice(device); - this->translationUnit.reset(new MockModuleTranslationUnit(this->translationUnit.get())); + this->translationUnit.reset(new MockModuleTranslationUnit(this->device)); } void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNeeded() { diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 589ec39650..d8e4f4d454 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -51,7 +51,6 @@ struct ModuleImmutableDataFixture : public DeviceFixture { struct MockModule : public L0::ModuleImp { using ModuleImp::allocatePrivateMemoryPerDispatch; - using ModuleImp::defaultMaxGroupSize; using ModuleImp::getKernelImmutableDataVector; using ModuleImp::kernelImmDatas; using ModuleImp::translationUnit; diff --git a/level_zero/core/test/unit_tests/mocks/mock_module.h b/level_zero/core/test/unit_tests/mocks/mock_module.h index 6d6dc1adf8..ce576abf14 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_module.h +++ b/level_zero/core/test/unit_tests/mocks/mock_module.h @@ -24,26 +24,6 @@ struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit { MockModuleTranslationUnit(L0::Device *device) : BaseClass{device} {} - MockModuleTranslationUnit(L0::ModuleTranslationUnit *orig) : BaseClass{orig->device} { - std::swap(this->globalConstBuffer, orig->globalConstBuffer); - std::swap(this->globalVarBuffer, orig->globalVarBuffer); - std::swap(this->programInfo, orig->programInfo); - std::swap(this->options, orig->options); - std::swap(this->shouldSuppressRebuildWarning, orig->shouldSuppressRebuildWarning); - std::swap(this->buildLog, orig->buildLog); - std::swap(this->irBinary, orig->irBinary); - std::swap(this->irBinarySize, orig->irBinarySize); - std::swap(this->unpackedDeviceBinary, orig->unpackedDeviceBinary); - std::swap(this->unpackedDeviceBinarySize, orig->unpackedDeviceBinarySize); - std::swap(this->packedDeviceBinary, orig->packedDeviceBinary); - std::swap(this->packedDeviceBinarySize, orig->packedDeviceBinarySize); - std::swap(this->debugData, orig->debugData); - std::swap(this->debugDataSize, orig->debugDataSize); - std::swap(this->alignedvIsas, orig->alignedvIsas); - std::swap(this->specConstantsValues, orig->specConstantsValues); - std::swap(this->isBuiltIn, orig->isBuiltIn); - } - ADDMETHOD(processUnpackedBinary, ze_result_t, true, ZE_RESULT_SUCCESS, (), ()); ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override { @@ -82,7 +62,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp { using BaseClass::translationUnit; using BaseClass::type; using BaseClass::unresolvedExternalsInfo; - uint32_t &maxGroupSize{BaseClass::defaultMaxGroupSize}; WhiteBox(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : ::L0::ModuleImp{device, moduleBuildLog, type} { @@ -122,13 +101,11 @@ struct MockModule : public L0::ModuleImp { using ModuleImp::populateHostGlobalSymbolsMap; using ModuleImp::symbols; using ModuleImp::translationUnit; - uint32_t &maxGroupSize = ModuleImp::defaultMaxGroupSize; MockModule(L0::Device *device, L0::ModuleBuildLog *moduleBuildLog, L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) { this->translationUnit.reset(new MockModuleTranslationUnit{device}); - this->maxGroupSize = 32u; }; ~MockModule() override = default; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 1ad2f41ac2..50845614ff 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -945,7 +945,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -987,7 +986,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -1028,7 +1026,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -1070,7 +1067,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFail ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -1113,7 +1109,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -1148,7 +1143,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); @@ -1189,7 +1183,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche ModuleType::User, 32u, mockKernelImmutableData.get()); - module->defaultMaxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 3fe83cef19..a0f3593cf9 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -2362,17 +2362,17 @@ kernels: zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily; MockModule mockModule{this->device, nullptr, ModuleType::User}; - mockModule.maxGroupSize = static_cast(this->device->getDeviceInfo().maxWorkGroupSize); + auto maxWorkGroupSize = static_cast(this->neoDevice->deviceInfo.maxWorkGroupSize); auto mockTU = mockModule.translationUnit.get(); auto result = mockTU->createFromNativeBinary(reinterpret_cast(zebin.storage.data()), zebin.storage.size()); EXPECT_EQ(result, ZE_RESULT_SUCCESS); auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor; auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor; - EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize); - EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1)); + EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize); + EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1)); - uint32_t groupSize[3] = {8, 4, (mockModule.maxGroupSize >> 5)}; // default max WGS + uint32_t groupSize[3] = {8, 4, (maxWorkGroupSize >> 5)}; // default max WGS Mock defaultKernel; defaultKernel.module = &mockModule; defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes; @@ -2409,15 +2409,15 @@ kernels: zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily; MockModule mockModule{this->device, nullptr, ModuleType::User}; - mockModule.maxGroupSize = static_cast(device->getDeviceInfo().maxWorkGroupSize); + auto maxWorkGroupSize = static_cast(this->neoDevice->deviceInfo.maxWorkGroupSize); auto mockTU = mockModule.translationUnit.get(); auto result = mockTU->createFromNativeBinary(reinterpret_cast(zebin.storage.data()), zebin.storage.size()); EXPECT_EQ(result, ZE_RESULT_SUCCESS); auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor; auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor; - EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize); - EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1)); + EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize); + EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1)); uint32_t groupSize[3] = {0u, 0u, 0u}; Mock defaultKernel; @@ -2425,18 +2425,18 @@ kernels: defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes; EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2])); EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u); - EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize); + EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize); groupSize[0] = groupSize[1] = groupSize[2] = 0u; - EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2])); + EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2])); EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u); - EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize); + EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize); groupSize[0] = groupSize[1] = groupSize[2] = 0u; Mock reducedKernel; reducedKernel.module = &mockModule; reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes; - EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2])); + EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2])); EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u); EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor)); @@ -3412,8 +3412,6 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinModuleThenSegmentIsNotPatchedAndImplicitArgsAreNotRequired) { auto pModule = std::make_unique>(device, nullptr, ModuleType::Builtin); - pModule->maxGroupSize = 32; - char data[64]{}; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 64; @@ -3454,7 +3452,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen DebugManager.flags.PrintDebugMessages.set(true); auto pModule = std::make_unique>(device, nullptr, ModuleType::Builtin); - pModule->maxGroupSize = 32; char data[64]{}; std::unique_ptr kernelInfo = std::make_unique(); @@ -3501,7 +3498,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat DebugManager.flags.PrintDebugMessages.set(true); auto pModule = std::make_unique>(device, nullptr, ModuleType::Builtin); - pModule->maxGroupSize = 32; char data[64]{}; std::unique_ptr kernelInfo = std::make_unique();