mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-28 16:48:45 +08:00
refactor(l0): follow-up cleanup after adding LargeGRF fix
Resubmission of871a3bd11dReverted by9882e992acdue to Elmo regression (most likely not related to the change anyway). Fixup for2778043d67Related-To: NEO-7684, HSD-18027378546 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e3ce887662
commit
49424eb859
@@ -624,8 +624,6 @@ ze_result_t ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neo
|
||||
|
||||
registerElfInDebuggerL0();
|
||||
|
||||
this->defaultMaxGroupSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().maxWorkGroupSize);
|
||||
|
||||
checkIfPrivateMemoryPerDispatchIsNeeded();
|
||||
|
||||
linkageSuccessful = this->linkBinary();
|
||||
@@ -717,7 +715,7 @@ const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelN
|
||||
}
|
||||
|
||||
uint32_t ModuleImp::getMaxGroupSize(const NEO::KernelDescriptor &kernelDescriptor) const {
|
||||
return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, this->defaultMaxGroupSize);
|
||||
return this->device->getGfxCoreHelper().calculateMaxWorkGroupSize(kernelDescriptor, static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize));
|
||||
}
|
||||
|
||||
void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) {
|
||||
|
||||
@@ -163,7 +163,6 @@ struct ModuleImp : public Module {
|
||||
std::unique_ptr<ModuleTranslationUnit> translationUnit;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
|
||||
uint32_t defaultMaxGroupSize = 0U;
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
|
||||
NEO::Linker::RelocatedSymbolsMap symbols;
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ ModuleImmutableDataFixture::MockModule::MockModule(L0::Device *device,
|
||||
uint32_t perHwThreadPrivateMemorySize,
|
||||
MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) {
|
||||
this->mockKernelImmData->setDevice(device);
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit(this->translationUnit.get()));
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit(this->device));
|
||||
}
|
||||
|
||||
void ModuleImmutableDataFixture::MockModule::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
|
||||
@@ -51,7 +51,6 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
|
||||
struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::allocatePrivateMemoryPerDispatch;
|
||||
using ModuleImp::defaultMaxGroupSize;
|
||||
using ModuleImp::getKernelImmutableDataVector;
|
||||
using ModuleImp::kernelImmDatas;
|
||||
using ModuleImp::translationUnit;
|
||||
|
||||
@@ -24,26 +24,6 @@ struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit {
|
||||
|
||||
MockModuleTranslationUnit(L0::Device *device) : BaseClass{device} {}
|
||||
|
||||
MockModuleTranslationUnit(L0::ModuleTranslationUnit *orig) : BaseClass{orig->device} {
|
||||
std::swap(this->globalConstBuffer, orig->globalConstBuffer);
|
||||
std::swap(this->globalVarBuffer, orig->globalVarBuffer);
|
||||
std::swap(this->programInfo, orig->programInfo);
|
||||
std::swap(this->options, orig->options);
|
||||
std::swap(this->shouldSuppressRebuildWarning, orig->shouldSuppressRebuildWarning);
|
||||
std::swap(this->buildLog, orig->buildLog);
|
||||
std::swap(this->irBinary, orig->irBinary);
|
||||
std::swap(this->irBinarySize, orig->irBinarySize);
|
||||
std::swap(this->unpackedDeviceBinary, orig->unpackedDeviceBinary);
|
||||
std::swap(this->unpackedDeviceBinarySize, orig->unpackedDeviceBinarySize);
|
||||
std::swap(this->packedDeviceBinary, orig->packedDeviceBinary);
|
||||
std::swap(this->packedDeviceBinarySize, orig->packedDeviceBinarySize);
|
||||
std::swap(this->debugData, orig->debugData);
|
||||
std::swap(this->debugDataSize, orig->debugDataSize);
|
||||
std::swap(this->alignedvIsas, orig->alignedvIsas);
|
||||
std::swap(this->specConstantsValues, orig->specConstantsValues);
|
||||
std::swap(this->isBuiltIn, orig->isBuiltIn);
|
||||
}
|
||||
|
||||
ADDMETHOD(processUnpackedBinary, ze_result_t, true, ZE_RESULT_SUCCESS, (), ());
|
||||
|
||||
ze_result_t compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override {
|
||||
@@ -82,7 +62,6 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass::translationUnit;
|
||||
using BaseClass::type;
|
||||
using BaseClass::unresolvedExternalsInfo;
|
||||
uint32_t &maxGroupSize{BaseClass::defaultMaxGroupSize};
|
||||
|
||||
WhiteBox(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type)
|
||||
: ::L0::ModuleImp{device, moduleBuildLog, type} {
|
||||
@@ -122,13 +101,11 @@ struct MockModule : public L0::ModuleImp {
|
||||
using ModuleImp::populateHostGlobalSymbolsMap;
|
||||
using ModuleImp::symbols;
|
||||
using ModuleImp::translationUnit;
|
||||
uint32_t &maxGroupSize = ModuleImp::defaultMaxGroupSize;
|
||||
|
||||
MockModule(L0::Device *device,
|
||||
L0::ModuleBuildLog *moduleBuildLog,
|
||||
L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) {
|
||||
this->translationUnit.reset(new MockModuleTranslationUnit{device});
|
||||
this->maxGroupSize = 32u;
|
||||
};
|
||||
|
||||
~MockModule() override = default;
|
||||
|
||||
@@ -945,7 +945,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -987,7 +986,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1028,7 +1026,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIs
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1070,7 +1067,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTStackAllocationFail
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1113,7 +1109,6 @@ HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndRTDispatchGlobalsArra
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1148,7 +1143,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitial
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
@@ -1189,7 +1183,6 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatche
|
||||
ModuleType::User,
|
||||
32u,
|
||||
mockKernelImmutableData.get());
|
||||
module->defaultMaxGroupSize = 10;
|
||||
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
|
||||
@@ -2362,17 +2362,17 @@ kernels:
|
||||
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
MockModule mockModule{this->device, nullptr, ModuleType::User};
|
||||
mockModule.maxGroupSize = static_cast<uint32_t>(this->device->getDeviceInfo().maxWorkGroupSize);
|
||||
auto maxWorkGroupSize = static_cast<uint32_t>(this->neoDevice->deviceInfo.maxWorkGroupSize);
|
||||
auto mockTU = mockModule.translationUnit.get();
|
||||
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
|
||||
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1));
|
||||
|
||||
uint32_t groupSize[3] = {8, 4, (mockModule.maxGroupSize >> 5)}; // default max WGS
|
||||
uint32_t groupSize[3] = {8, 4, (maxWorkGroupSize >> 5)}; // default max WGS
|
||||
Mock<Kernel> defaultKernel;
|
||||
defaultKernel.module = &mockModule;
|
||||
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
|
||||
@@ -2409,15 +2409,15 @@ kernels:
|
||||
zebin.elfHeader->machine = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
MockModule mockModule{this->device, nullptr, ModuleType::User};
|
||||
mockModule.maxGroupSize = static_cast<uint32_t>(device->getDeviceInfo().maxWorkGroupSize);
|
||||
auto maxWorkGroupSize = static_cast<uint32_t>(this->neoDevice->deviceInfo.maxWorkGroupSize);
|
||||
auto mockTU = mockModule.translationUnit.get();
|
||||
auto result = mockTU->createFromNativeBinary(reinterpret_cast<const char *>(zebin.storage.data()), zebin.storage.size());
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto &defaultKernelDescriptor = mockTU->programInfo.kernelInfos[0]->kernelDescriptor;
|
||||
auto &reducedKernelDescriptor = mockTU->programInfo.kernelInfos[1]->kernelDescriptor;
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), mockModule.maxGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (mockModule.maxGroupSize >> 1));
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(defaultKernelDescriptor), maxWorkGroupSize);
|
||||
EXPECT_EQ(mockModule.getMaxGroupSize(reducedKernelDescriptor), (maxWorkGroupSize >> 1));
|
||||
|
||||
uint32_t groupSize[3] = {0u, 0u, 0u};
|
||||
Mock<Kernel> defaultKernel;
|
||||
@@ -2425,18 +2425,18 @@ kernels:
|
||||
defaultKernel.descriptor.kernelAttributes = defaultKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(4096u, 4096u, 4096u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize);
|
||||
|
||||
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, defaultKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.maxGroupSize);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], maxWorkGroupSize);
|
||||
|
||||
groupSize[0] = groupSize[1] = groupSize[2] = 0u;
|
||||
Mock<Kernel> reducedKernel;
|
||||
reducedKernel.module = &mockModule;
|
||||
reducedKernel.descriptor.kernelAttributes = reducedKernelDescriptor.kernelAttributes;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(mockModule.maxGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, reducedKernel.suggestGroupSize(maxWorkGroupSize, 1u, 1u, &groupSize[0], &groupSize[1], &groupSize[2]));
|
||||
EXPECT_GT(groupSize[0] * groupSize[1] * groupSize[2], 0u);
|
||||
EXPECT_LE(groupSize[0] * groupSize[1] * groupSize[2], mockModule.getMaxGroupSize(reducedKernelDescriptor));
|
||||
|
||||
@@ -3412,8 +3412,6 @@ TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPat
|
||||
TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingBuiltinModuleThenSegmentIsNotPatchedAndImplicitArgsAreNotRequired) {
|
||||
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
|
||||
|
||||
pModule->maxGroupSize = 32;
|
||||
|
||||
char data[64]{};
|
||||
auto kernelInfo = new KernelInfo();
|
||||
kernelInfo->heapInfo.KernelHeapSize = 64;
|
||||
@@ -3454,7 +3452,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
|
||||
DebugManager.flags.PrintDebugMessages.set(true);
|
||||
|
||||
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
|
||||
pModule->maxGroupSize = 32;
|
||||
|
||||
char data[64]{};
|
||||
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();
|
||||
@@ -3501,7 +3498,6 @@ TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivat
|
||||
DebugManager.flags.PrintDebugMessages.set(true);
|
||||
|
||||
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
|
||||
pModule->maxGroupSize = 32;
|
||||
|
||||
char data[64]{};
|
||||
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
Reference in New Issue
Block a user