Revert "fix: correctly set unpacked device binary prior decoding"

This reverts commit 60d5484e6b.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-09-17 04:39:41 +02:00
committed by Compute-Runtime-Automation
parent a38b3895c5
commit 10c37fbb78
7 changed files with 9 additions and 196 deletions

View File

@@ -227,19 +227,15 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) {
} }
if (!decodedSingleDeviceBinary.isSet) { if (!decodedSingleDeviceBinary.isSet) {
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
SingleDeviceBinary singleDeviceBinary = {};
decodedSingleDeviceBinary.programInfo = {}; decodedSingleDeviceBinary.programInfo = {};
singleDeviceBinary.deviceBinary = blob; auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
singleDeviceBinary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment()); SingleDeviceBinary binary = {};
singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection = this->indirectDetectionVersion; binary.deviceBinary = blob;
singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer = this->indirectAccessBufferMajorVersion; binary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment());
singleDeviceBinary.generator = this->isGeneratedByIgc ? GeneratorType::igc : GeneratorType::unknown;
auto &gfxCoreHelper = clDevice.getGfxCoreHelper(); auto &gfxCoreHelper = clDevice.getGfxCoreHelper();
std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, singleDeviceBinary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper); std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, binary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper);
} else { } else {
decodedSingleDeviceBinary.isSet = false; decodedSingleDeviceBinary.isSet = false;
} }

View File

@@ -217,7 +217,7 @@ cl_int Program::createProgramFromBinary(
this->isGeneratedByIgc = singleDeviceBinary.generator == GeneratorType::igc; this->isGeneratedByIgc = singleDeviceBinary.generator == GeneratorType::igc;
this->indirectDetectionVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection; this->indirectDetectionVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection;
this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer; this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection;
bool rebuild = AddressingModeHelper::containsBindlessKernel(decodedSingleDeviceBinary.programInfo.kernelInfos); bool rebuild = AddressingModeHelper::containsBindlessKernel(decodedSingleDeviceBinary.programInfo.kernelInfos);
rebuild |= !clDevice.getDevice().getExecutionEnvironment()->isOneApiPvcWaEnv(); rebuild |= !clDevice.getDevice().getExecutionEnvironment()->isOneApiPvcWaEnv();

View File

@@ -597,104 +597,6 @@ TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltFo
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
} }
TEST_F(ClBuildProgramMultiDeviceTests, GivenProgramCreatedFromSourceWhenBuildingThenCorrectlyFilledSingleDeviceBinaryIsUsed) {
MockUnrestrictiveContextMultiGPU context;
cl_program pProgram = nullptr;
std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===('
kernels:
- name : some_kernel
execution_env :
simd_size : 32
require_iab: true
- name : some_other_kernel
execution_env :
simd_size : 32
)===";
uint8_t kernelIsa[8]{0U};
ZebinTestData::ValidEmptyProgram<is32bit ? NEO::Elf::EI_CLASS_32 : NEO::Elf::EI_CLASS_64> zebin;
zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo);
zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef<const uint8_t>::fromAny(zeinfo.data(), zeinfo.size()));
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)});
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)});
const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'};
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data);
{
const uint32_t indirectAccessBufferMajorVersion = 4u;
Zebin::Elf::ElfNoteSection elfNoteSection = {};
elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion;
elfNoteSection.descSize = sizeof(uint32_t);
elfNoteSection.nameSize = 8u;
auto sectionDataSize = sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + alignUp(elfNoteSection.descSize, 4);
auto noteIntelGTSectionData = std::make_unique<uint8_t[]>(sectionDataSize);
auto appendSingleIntelGTSectionData = [](const NEO::Elf::ElfNoteSection &elfNoteSection, uint8_t *const intelGTSectionData, const uint8_t *descData, const char *ownerName, size_t spaceAvailable) {
size_t offset = 0;
ASSERT_GE(spaceAvailable, sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize);
memcpy_s(ptrOffset(intelGTSectionData, offset), sizeof(NEO::Elf::ElfNoteSection), &elfNoteSection, sizeof(NEO::Elf::ElfNoteSection));
offset += sizeof(NEO::Elf::ElfNoteSection);
memcpy_s(reinterpret_cast<char *>(ptrOffset(intelGTSectionData, offset)), elfNoteSection.nameSize, ownerName, elfNoteSection.nameSize);
offset += elfNoteSection.nameSize;
memcpy_s(ptrOffset(intelGTSectionData, offset), elfNoteSection.descSize, descData, elfNoteSection.descSize);
offset += elfNoteSection.descSize;
};
appendSingleIntelGTSectionData(elfNoteSection, noteIntelGTSectionData.get(), reinterpret_cast<const uint8_t *>(&indirectAccessBufferMajorVersion),
Zebin::Elf::intelGTNoteOwnerName.str().c_str(), sectionDataSize);
zebin.appendSection(Zebin::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef<uint8_t>::fromAny(noteIntelGTSectionData.get(), sectionDataSize));
}
MockCompilerDebugVars debugVars;
debugVars.binaryToReturn = const_cast<unsigned char *>(zebin.storage.data());
debugVars.binaryToReturnSize = zebin.storage.size();
gEnvironment->igcPushDebugVars(debugVars);
gEnvironment->fclPushDebugVars(debugVars);
cl_int retVal = CL_INVALID_PROGRAM;
pProgram = clCreateProgramWithSource(
&context,
1,
sources,
&sourceKernelSize,
&retVal);
EXPECT_NE(nullptr, pProgram);
ASSERT_EQ(CL_SUCCESS, retVal);
cl_device_id firstDevice = context.pRootDevice0;
cl_device_id secondDevice = context.pRootDevice1;
cl_device_id devices[] = {firstDevice, secondDevice};
retVal = clBuildProgram(
pProgram,
2,
devices,
nullptr,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
cl_kernel pKernel = clCreateKernel(pProgram, "some_kernel", &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
MultiDeviceKernel *kernel = castToObject<MultiDeviceKernel>(pKernel);
Program *program = castToObject<Program>(pProgram);
EXPECT_EQ(4u, program->getIndirectAccessBufferVersion());
EXPECT_FALSE(kernel->getKernelInfos()[1]->kernelDescriptor.kernelMetadata.isGeneratedByIgc);
retVal = clReleaseKernel(pKernel);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = clReleaseProgram(pProgram);
EXPECT_EQ(CL_SUCCESS, retVal);
gEnvironment->igcPopDebugVars();
gEnvironment->fclPopDebugVars();
}
TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) { TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) {
MockUnrestrictiveContextMultiGPU context; MockUnrestrictiveContextMultiGPU context;
cl_program pProgram = nullptr; cl_program pProgram = nullptr;

View File

@@ -661,21 +661,15 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs; kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs;
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
uint64_t implicitArgsSize = 0; uint64_t implicitArgsSize = 0;
uint8_t version = kernelDescriptor.kernelMetadata.indirectAccessBuffer; if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {
if (version == 0) {
version = pDevice->getGfxCoreHelper().getImplicitArgsVersion();
}
if (version == 0) {
implicitArgsSize = ImplicitArgsV0::getAlignedSize(); implicitArgsSize = ImplicitArgsV0::getAlignedSize();
} else if (version == 1) { } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 1) {
implicitArgsSize = ImplicitArgsV1::getAlignedSize(); implicitArgsSize = ImplicitArgsV1::getAlignedSize();
} else if (version == 2) { } else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 2) {
implicitArgsSize = ImplicitArgsV2::getAlignedSize(); implicitArgsSize = ImplicitArgsV2::getAlignedSize();
} else { } else {
UNRECOVERABLE_IF(true); UNRECOVERABLE_IF(true);
} }
// Choose relocation size based on relocation type // Choose relocation size based on relocation type
auto patchSize = pImplicitArgsReloc.second == RelocationInfo::Type::address ? 8 : 4; auto patchSize = pImplicitArgsReloc.second == RelocationInfo::Type::address ? 8 : 4;
patchWithRequiredSize(pImplicitArgsReloc.first, patchSize, implicitArgsSize); patchWithRequiredSize(pImplicitArgsReloc.first, patchSize, implicitArgsSize);

View File

@@ -61,7 +61,6 @@ SingleDeviceBinary unpackSingleZebin(const ArrayRef<const uint8_t> archive, cons
if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) { if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) {
validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret); validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret);
} else { } else {
Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret);
const auto &flags = reinterpret_cast<const NEO::Zebin::Elf::ZebinTargetFlags &>(elf.elfFileHeader->flags); const auto &flags = reinterpret_cast<const NEO::Zebin::Elf::ZebinTargetFlags &>(elf.elfFileHeader->flags);
validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily
? (requestedTargetDevice.coreFamily == static_cast<GFXCORE_FAMILY>(elf.elfFileHeader->machine)) ? (requestedTargetDevice.coreFamily == static_cast<GFXCORE_FAMILY>(elf.elfFileHeader->machine))
@@ -131,7 +130,6 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s
for (auto &kernelInfo : dst.kernelInfos) { for (auto &kernelInfo : dst.kernelInfos) {
kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc; kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc;
kernelInfo->kernelDescriptor.kernelMetadata.indirectAccessBuffer = src.generatorFeatureVersions.indirectAccessBuffer;
if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) { if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState(); kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();

View File

@@ -267,7 +267,6 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass {
uint16_t compiledSubGroupsNumber = 0U; uint16_t compiledSubGroupsNumber = 0U;
uint8_t requiredSubGroupSize = 0U; uint8_t requiredSubGroupSize = 0U;
uint8_t requiredThreadGroupDispatchSize = 0U; uint8_t requiredThreadGroupDispatchSize = 0U;
uint8_t indirectAccessBuffer = 0u;
bool isGeneratedByIgc = true; bool isGeneratedByIgc = true;
} kernelMetadata; } kernelMetadata;

View File

@@ -2313,82 +2313,6 @@ HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThen
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
} }
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndKernelDescriptorWithImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) {
DebugManagerStateRestore restore;
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
uint32_t getImplicitArgsVersion() const override {
return 0;
}
};
NEO::LinkerInput linkerInput;
vISA::GenRelocEntry reloc = {};
std::string relocationName = implicitArgsRelocationSymbolName;
memcpy_s(reloc.r_symbol, 1024, relocationName.c_str(), relocationName.size());
reloc.r_offset = 8;
reloc.r_type = vISA::GenRelocType::R_SYM_ADDR_32;
vISA::GenRelocEntry reloc64 = {};
memcpy_s(reloc64.r_symbol, 1024, relocationName.c_str(), relocationName.size());
reloc64.r_offset = 16;
reloc64.r_type = vISA::GenRelocType::R_SYM_ADDR;
vISA::GenRelocEntry relocs[] = {reloc, reloc64};
constexpr uint32_t numRelocations = 2;
bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0);
EXPECT_TRUE(decodeRelocSuccess);
NEO::Linker linker(linkerInput);
NEO::Linker::SegmentInfo globalVarSegment, globalConstSegment, exportedFuncSegment;
globalVarSegment.gpuAddress = 8;
globalVarSegment.segmentSize = 64;
globalConstSegment.gpuAddress = 128;
globalConstSegment.segmentSize = 256;
exportedFuncSegment.gpuAddress = 4096;
exportedFuncSegment.segmentSize = 1024;
NEO::Linker::UnresolvedExternals unresolvedExternals;
NEO::Linker::KernelDescriptorsT kernelDescriptors;
NEO::Linker::ExternalFunctionsT externalFunctions;
KernelDescriptor kernelDescriptor;
kernelDescriptors.push_back(&kernelDescriptor);
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
kernelDescriptor.kernelAttributes.flags.useStackCalls = true;
kernelDescriptor.kernelMetadata.indirectAccessBuffer = 1;
HardwareInfo hwInfo = *defaultHwInfo;
MockExecutionEnvironment executionEnvironment(&hwInfo, false, 1);
executionEnvironment.incRefInternal();
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
auto rootDeviceIndex = deviceFactory.rootDevices[0]->getRootDeviceIndex();
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*deviceFactory.rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]);
std::vector<char> instructionSegment;
uint32_t initData = 0x77777777;
instructionSegment.resize(32, static_cast<char>(initData));
NEO::Linker::PatchableSegment seg0;
seg0.hostPointer = instructionSegment.data();
seg0.segmentSize = instructionSegment.size();
NEO::Linker::PatchableSegments patchableInstructionSegments{seg0};
auto linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {},
nullptr, nullptr, patchableInstructionSegments, unresolvedExternals,
deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions);
EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult);
auto addressToPatch = reinterpret_cast<const uint32_t *>(instructionSegment.data() + reloc.r_offset);
EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), *addressToPatch);
EXPECT_EQ(initData, *(addressToPatch - 1));
EXPECT_EQ(initData, *(addressToPatch + 1));
auto addressToPatch64 = (instructionSegment.data() + reloc64.r_offset);
uint64_t patchedValue64 = 0;
memcpy_s(&patchedValue64, sizeof(patchedValue64), addressToPatch64, sizeof(patchedValue64));
EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), patchedValue64);
}
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsWithUnknownVersionWhenLinkingThenUnrecoverableIfCalled) { HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsWithUnknownVersionWhenLinkingThenUnrecoverableIfCalled) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> { struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {