diff --git a/opencl/source/program/process_device_binary.cpp b/opencl/source/program/process_device_binary.cpp index 35891b1810..fbf5cb8721 100644 --- a/opencl/source/program/process_device_binary.cpp +++ b/opencl/source/program/process_device_binary.cpp @@ -227,19 +227,15 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) { } if (!decodedSingleDeviceBinary.isSet) { - - auto blob = ArrayRef(reinterpret_cast(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize); - SingleDeviceBinary singleDeviceBinary = {}; decodedSingleDeviceBinary.programInfo = {}; - singleDeviceBinary.deviceBinary = blob; - singleDeviceBinary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment()); - singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection = this->indirectDetectionVersion; - singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer = this->indirectAccessBufferMajorVersion; - singleDeviceBinary.generator = this->isGeneratedByIgc ? GeneratorType::igc : GeneratorType::unknown; + auto blob = ArrayRef(reinterpret_cast(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize); + SingleDeviceBinary binary = {}; + binary.deviceBinary = blob; + binary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment()); auto &gfxCoreHelper = clDevice.getGfxCoreHelper(); - std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, singleDeviceBinary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper); + std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, binary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper); } else { decodedSingleDeviceBinary.isSet = false; } diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index 086a55cf0e..9500c37298 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -217,7 +217,7 @@ cl_int Program::createProgramFromBinary( this->isGeneratedByIgc = singleDeviceBinary.generator == GeneratorType::igc; this->indirectDetectionVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection; - this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer; + this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection; bool rebuild = AddressingModeHelper::containsBindlessKernel(decodedSingleDeviceBinary.programInfo.kernelInfos); rebuild |= !clDevice.getDevice().getExecutionEnvironment()->isOneApiPvcWaEnv(); diff --git a/opencl/test/unit_test/api/cl_build_program_tests.inl b/opencl/test/unit_test/api/cl_build_program_tests.inl index a9e22a4e93..a9ec781178 100644 --- a/opencl/test/unit_test/api/cl_build_program_tests.inl +++ b/opencl/test/unit_test/api/cl_build_program_tests.inl @@ -597,104 +597,6 @@ TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltFo EXPECT_EQ(CL_SUCCESS, retVal); } -TEST_F(ClBuildProgramMultiDeviceTests, GivenProgramCreatedFromSourceWhenBuildingThenCorrectlyFilledSingleDeviceBinaryIsUsed) { - MockUnrestrictiveContextMultiGPU context; - cl_program pProgram = nullptr; - - std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===(' -kernels: - - name : some_kernel - execution_env : - simd_size : 32 - require_iab: true - - name : some_other_kernel - execution_env : - simd_size : 32 -)==="; - - uint8_t kernelIsa[8]{0U}; - ZebinTestData::ValidEmptyProgram zebin; - zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo); - zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef::fromAny(zeinfo.data(), zeinfo.size())); - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)}); - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)}); - - const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'}; - zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data); - - { - const uint32_t indirectAccessBufferMajorVersion = 4u; - - Zebin::Elf::ElfNoteSection elfNoteSection = {}; - elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion; - elfNoteSection.descSize = sizeof(uint32_t); - elfNoteSection.nameSize = 8u; - - auto sectionDataSize = sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + alignUp(elfNoteSection.descSize, 4); - auto noteIntelGTSectionData = std::make_unique(sectionDataSize); - auto appendSingleIntelGTSectionData = [](const NEO::Elf::ElfNoteSection &elfNoteSection, uint8_t *const intelGTSectionData, const uint8_t *descData, const char *ownerName, size_t spaceAvailable) { - size_t offset = 0; - ASSERT_GE(spaceAvailable, sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize); - memcpy_s(ptrOffset(intelGTSectionData, offset), sizeof(NEO::Elf::ElfNoteSection), &elfNoteSection, sizeof(NEO::Elf::ElfNoteSection)); - offset += sizeof(NEO::Elf::ElfNoteSection); - memcpy_s(reinterpret_cast(ptrOffset(intelGTSectionData, offset)), elfNoteSection.nameSize, ownerName, elfNoteSection.nameSize); - offset += elfNoteSection.nameSize; - memcpy_s(ptrOffset(intelGTSectionData, offset), elfNoteSection.descSize, descData, elfNoteSection.descSize); - offset += elfNoteSection.descSize; - }; - - appendSingleIntelGTSectionData(elfNoteSection, noteIntelGTSectionData.get(), reinterpret_cast(&indirectAccessBufferMajorVersion), - Zebin::Elf::intelGTNoteOwnerName.str().c_str(), sectionDataSize); - zebin.appendSection(Zebin::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef::fromAny(noteIntelGTSectionData.get(), sectionDataSize)); - } - - MockCompilerDebugVars debugVars; - debugVars.binaryToReturn = const_cast(zebin.storage.data()); - debugVars.binaryToReturnSize = zebin.storage.size(); - gEnvironment->igcPushDebugVars(debugVars); - gEnvironment->fclPushDebugVars(debugVars); - - cl_int retVal = CL_INVALID_PROGRAM; - pProgram = clCreateProgramWithSource( - &context, - 1, - sources, - &sourceKernelSize, - &retVal); - - EXPECT_NE(nullptr, pProgram); - ASSERT_EQ(CL_SUCCESS, retVal); - - cl_device_id firstDevice = context.pRootDevice0; - cl_device_id secondDevice = context.pRootDevice1; - cl_device_id devices[] = {firstDevice, secondDevice}; - - retVal = clBuildProgram( - pProgram, - 2, - devices, - nullptr, - nullptr, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - - cl_kernel pKernel = clCreateKernel(pProgram, "some_kernel", &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - - MultiDeviceKernel *kernel = castToObject(pKernel); - Program *program = castToObject(pProgram); - EXPECT_EQ(4u, program->getIndirectAccessBufferVersion()); - EXPECT_FALSE(kernel->getKernelInfos()[1]->kernelDescriptor.kernelMetadata.isGeneratedByIgc); - - retVal = clReleaseKernel(pKernel); - EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - gEnvironment->igcPopDebugVars(); - gEnvironment->fclPopDebugVars(); -} - TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; diff --git a/shared/source/compiler_interface/linker.cpp b/shared/source/compiler_interface/linker.cpp index 3938aa6872..e07305dad9 100644 --- a/shared/source/compiler_interface/linker.cpp +++ b/shared/source/compiler_interface/linker.cpp @@ -661,21 +661,15 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs; if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { uint64_t implicitArgsSize = 0; - uint8_t version = kernelDescriptor.kernelMetadata.indirectAccessBuffer; - if (version == 0) { - version = pDevice->getGfxCoreHelper().getImplicitArgsVersion(); - } - - if (version == 0) { + if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) { implicitArgsSize = ImplicitArgsV0::getAlignedSize(); - } else if (version == 1) { + } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 1) { implicitArgsSize = ImplicitArgsV1::getAlignedSize(); - } else if (version == 2) { + } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 2) { implicitArgsSize = ImplicitArgsV2::getAlignedSize(); } else { UNRECOVERABLE_IF(true); } - // Choose relocation size based on relocation type auto patchSize = pImplicitArgsReloc.second == RelocationInfo::Type::address ? 8 : 4; patchWithRequiredSize(pImplicitArgsReloc.first, patchSize, implicitArgsSize); diff --git a/shared/source/device_binary_format/device_binary_format_zebin.cpp b/shared/source/device_binary_format/device_binary_format_zebin.cpp index d44441cef7..59cfc1652f 100644 --- a/shared/source/device_binary_format/device_binary_format_zebin.cpp +++ b/shared/source/device_binary_format/device_binary_format_zebin.cpp @@ -61,7 +61,6 @@ SingleDeviceBinary unpackSingleZebin(const ArrayRef archive, cons if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) { validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret); } else { - Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret); const auto &flags = reinterpret_cast(elf.elfFileHeader->flags); validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily ? (requestedTargetDevice.coreFamily == static_cast(elf.elfFileHeader->machine)) @@ -131,7 +130,6 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s for (auto &kernelInfo : dst.kernelInfos) { kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc; - kernelInfo->kernelDescriptor.kernelMetadata.indirectAccessBuffer = src.generatorFeatureVersions.indirectAccessBuffer; if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState(); diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 73676770ea..68f9861e60 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -267,7 +267,6 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass { uint16_t compiledSubGroupsNumber = 0U; uint8_t requiredSubGroupSize = 0U; uint8_t requiredThreadGroupDispatchSize = 0U; - uint8_t indirectAccessBuffer = 0u; bool isGeneratedByIgc = true; } kernelMetadata; diff --git a/shared/test/unit_test/compiler_interface/linker_tests.cpp b/shared/test/unit_test/compiler_interface/linker_tests.cpp index 2ac9fbd9df..4bf8302aab 100644 --- a/shared/test/unit_test/compiler_interface/linker_tests.cpp +++ b/shared/test/unit_test/compiler_interface/linker_tests.cpp @@ -2313,82 +2313,6 @@ HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThen EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); } -HWTEST_F(LinkerTests, givenImplicitArgRelocationAndKernelDescriptorWithImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) { - DebugManagerStateRestore restore; - struct MockGfxCoreHelper : NEO::GfxCoreHelperHw { - uint32_t getImplicitArgsVersion() const override { - return 0; - } - }; - - NEO::LinkerInput linkerInput; - - vISA::GenRelocEntry reloc = {}; - std::string relocationName = implicitArgsRelocationSymbolName; - memcpy_s(reloc.r_symbol, 1024, relocationName.c_str(), relocationName.size()); - reloc.r_offset = 8; - reloc.r_type = vISA::GenRelocType::R_SYM_ADDR_32; - - vISA::GenRelocEntry reloc64 = {}; - memcpy_s(reloc64.r_symbol, 1024, relocationName.c_str(), relocationName.size()); - reloc64.r_offset = 16; - reloc64.r_type = vISA::GenRelocType::R_SYM_ADDR; - - vISA::GenRelocEntry relocs[] = {reloc, reloc64}; - constexpr uint32_t numRelocations = 2; - bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0); - EXPECT_TRUE(decodeRelocSuccess); - - NEO::Linker linker(linkerInput); - NEO::Linker::SegmentInfo globalVarSegment, globalConstSegment, exportedFuncSegment; - globalVarSegment.gpuAddress = 8; - globalVarSegment.segmentSize = 64; - globalConstSegment.gpuAddress = 128; - globalConstSegment.segmentSize = 256; - exportedFuncSegment.gpuAddress = 4096; - exportedFuncSegment.segmentSize = 1024; - NEO::Linker::UnresolvedExternals unresolvedExternals; - NEO::Linker::KernelDescriptorsT kernelDescriptors; - NEO::Linker::ExternalFunctionsT externalFunctions; - KernelDescriptor kernelDescriptor; - kernelDescriptors.push_back(&kernelDescriptor); - kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; - kernelDescriptor.kernelAttributes.flags.useStackCalls = true; - kernelDescriptor.kernelMetadata.indirectAccessBuffer = 1; - - HardwareInfo hwInfo = *defaultHwInfo; - MockExecutionEnvironment executionEnvironment(&hwInfo, false, 1); - executionEnvironment.incRefInternal(); - - UltDeviceFactory deviceFactory{1, 0, executionEnvironment}; - auto rootDeviceIndex = deviceFactory.rootDevices[0]->getRootDeviceIndex(); - RAIIGfxCoreHelperFactory raii(*deviceFactory.rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]); - - std::vector instructionSegment; - uint32_t initData = 0x77777777; - instructionSegment.resize(32, static_cast(initData)); - NEO::Linker::PatchableSegment seg0; - seg0.hostPointer = instructionSegment.data(); - seg0.segmentSize = instructionSegment.size(); - NEO::Linker::PatchableSegments patchableInstructionSegments{seg0}; - - auto linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {}, - nullptr, nullptr, patchableInstructionSegments, unresolvedExternals, - deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions); - EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult); - - auto addressToPatch = reinterpret_cast(instructionSegment.data() + reloc.r_offset); - - EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), *addressToPatch); - EXPECT_EQ(initData, *(addressToPatch - 1)); - EXPECT_EQ(initData, *(addressToPatch + 1)); - - auto addressToPatch64 = (instructionSegment.data() + reloc64.r_offset); - uint64_t patchedValue64 = 0; - memcpy_s(&patchedValue64, sizeof(patchedValue64), addressToPatch64, sizeof(patchedValue64)); - EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), patchedValue64); -} - HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsWithUnknownVersionWhenLinkingThenUnrecoverableIfCalled) { DebugManagerStateRestore restore; struct MockGfxCoreHelper : NEO::GfxCoreHelperHw {