mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Revert "fix: correctly set unpacked device binary prior decoding"
This reverts commit 60d5484e6b.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a38b3895c5
commit
10c37fbb78
@@ -227,19 +227,15 @@ cl_int Program::processGenBinary(const ClDevice &clDevice) {
|
||||
}
|
||||
|
||||
if (!decodedSingleDeviceBinary.isSet) {
|
||||
|
||||
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
|
||||
SingleDeviceBinary singleDeviceBinary = {};
|
||||
decodedSingleDeviceBinary.programInfo = {};
|
||||
|
||||
singleDeviceBinary.deviceBinary = blob;
|
||||
singleDeviceBinary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment());
|
||||
singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection = this->indirectDetectionVersion;
|
||||
singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer = this->indirectAccessBufferMajorVersion;
|
||||
singleDeviceBinary.generator = this->isGeneratedByIgc ? GeneratorType::igc : GeneratorType::unknown;
|
||||
auto blob = ArrayRef<const uint8_t>(reinterpret_cast<const uint8_t *>(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize);
|
||||
SingleDeviceBinary binary = {};
|
||||
binary.deviceBinary = blob;
|
||||
binary.targetDevice = NEO::getTargetDevice(clDevice.getRootDeviceEnvironment());
|
||||
|
||||
auto &gfxCoreHelper = clDevice.getGfxCoreHelper();
|
||||
std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, singleDeviceBinary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper);
|
||||
std::tie(decodedSingleDeviceBinary.decodeError, std::ignore) = NEO::decodeSingleDeviceBinary(decodedSingleDeviceBinary.programInfo, binary, decodedSingleDeviceBinary.decodeErrors, decodedSingleDeviceBinary.decodeWarnings, gfxCoreHelper);
|
||||
} else {
|
||||
decodedSingleDeviceBinary.isSet = false;
|
||||
}
|
||||
|
||||
@@ -217,7 +217,7 @@ cl_int Program::createProgramFromBinary(
|
||||
|
||||
this->isGeneratedByIgc = singleDeviceBinary.generator == GeneratorType::igc;
|
||||
this->indirectDetectionVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection;
|
||||
this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer;
|
||||
this->indirectAccessBufferMajorVersion = singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection;
|
||||
|
||||
bool rebuild = AddressingModeHelper::containsBindlessKernel(decodedSingleDeviceBinary.programInfo.kernelInfos);
|
||||
rebuild |= !clDevice.getDevice().getExecutionEnvironment()->isOneApiPvcWaEnv();
|
||||
|
||||
@@ -597,104 +597,6 @@ TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltFo
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
TEST_F(ClBuildProgramMultiDeviceTests, GivenProgramCreatedFromSourceWhenBuildingThenCorrectlyFilledSingleDeviceBinaryIsUsed) {
|
||||
MockUnrestrictiveContextMultiGPU context;
|
||||
cl_program pProgram = nullptr;
|
||||
|
||||
std::string zeinfo = std::string("version :\'") + versionToString(Zebin::ZeInfo::zeInfoDecoderVersion) + R"===('
|
||||
kernels:
|
||||
- name : some_kernel
|
||||
execution_env :
|
||||
simd_size : 32
|
||||
require_iab: true
|
||||
- name : some_other_kernel
|
||||
execution_env :
|
||||
simd_size : 32
|
||||
)===";
|
||||
|
||||
uint8_t kernelIsa[8]{0U};
|
||||
ZebinTestData::ValidEmptyProgram<is32bit ? NEO::Elf::EI_CLASS_32 : NEO::Elf::EI_CLASS_64> zebin;
|
||||
zebin.removeSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo);
|
||||
zebin.appendSection(NEO::Zebin::Elf::SectionHeaderTypeZebin::SHT_ZEBIN_ZEINFO, NEO::Zebin::Elf::SectionNames::zeInfo, ArrayRef<const uint8_t>::fromAny(zeinfo.data(), zeinfo.size()));
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_kernel", {kernelIsa, sizeof(kernelIsa)});
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::textPrefix.str() + "some_other_kernel", {kernelIsa, sizeof(kernelIsa)});
|
||||
|
||||
const uint8_t data[] = {'H', 'e', 'l', 'l', 'o', '!'};
|
||||
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Zebin::Elf::SectionNames::dataConstString, data);
|
||||
|
||||
{
|
||||
const uint32_t indirectAccessBufferMajorVersion = 4u;
|
||||
|
||||
Zebin::Elf::ElfNoteSection elfNoteSection = {};
|
||||
elfNoteSection.type = Zebin::Elf::IntelGTSectionType::indirectAccessBufferMajorVersion;
|
||||
elfNoteSection.descSize = sizeof(uint32_t);
|
||||
elfNoteSection.nameSize = 8u;
|
||||
|
||||
auto sectionDataSize = sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + alignUp(elfNoteSection.descSize, 4);
|
||||
auto noteIntelGTSectionData = std::make_unique<uint8_t[]>(sectionDataSize);
|
||||
auto appendSingleIntelGTSectionData = [](const NEO::Elf::ElfNoteSection &elfNoteSection, uint8_t *const intelGTSectionData, const uint8_t *descData, const char *ownerName, size_t spaceAvailable) {
|
||||
size_t offset = 0;
|
||||
ASSERT_GE(spaceAvailable, sizeof(Zebin::Elf::ElfNoteSection) + elfNoteSection.nameSize + elfNoteSection.descSize);
|
||||
memcpy_s(ptrOffset(intelGTSectionData, offset), sizeof(NEO::Elf::ElfNoteSection), &elfNoteSection, sizeof(NEO::Elf::ElfNoteSection));
|
||||
offset += sizeof(NEO::Elf::ElfNoteSection);
|
||||
memcpy_s(reinterpret_cast<char *>(ptrOffset(intelGTSectionData, offset)), elfNoteSection.nameSize, ownerName, elfNoteSection.nameSize);
|
||||
offset += elfNoteSection.nameSize;
|
||||
memcpy_s(ptrOffset(intelGTSectionData, offset), elfNoteSection.descSize, descData, elfNoteSection.descSize);
|
||||
offset += elfNoteSection.descSize;
|
||||
};
|
||||
|
||||
appendSingleIntelGTSectionData(elfNoteSection, noteIntelGTSectionData.get(), reinterpret_cast<const uint8_t *>(&indirectAccessBufferMajorVersion),
|
||||
Zebin::Elf::intelGTNoteOwnerName.str().c_str(), sectionDataSize);
|
||||
zebin.appendSection(Zebin::Elf::SHT_NOTE, Zebin::Elf::SectionNames::noteIntelGT, ArrayRef<uint8_t>::fromAny(noteIntelGTSectionData.get(), sectionDataSize));
|
||||
}
|
||||
|
||||
MockCompilerDebugVars debugVars;
|
||||
debugVars.binaryToReturn = const_cast<unsigned char *>(zebin.storage.data());
|
||||
debugVars.binaryToReturnSize = zebin.storage.size();
|
||||
gEnvironment->igcPushDebugVars(debugVars);
|
||||
gEnvironment->fclPushDebugVars(debugVars);
|
||||
|
||||
cl_int retVal = CL_INVALID_PROGRAM;
|
||||
pProgram = clCreateProgramWithSource(
|
||||
&context,
|
||||
1,
|
||||
sources,
|
||||
&sourceKernelSize,
|
||||
&retVal);
|
||||
|
||||
EXPECT_NE(nullptr, pProgram);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
cl_device_id firstDevice = context.pRootDevice0;
|
||||
cl_device_id secondDevice = context.pRootDevice1;
|
||||
cl_device_id devices[] = {firstDevice, secondDevice};
|
||||
|
||||
retVal = clBuildProgram(
|
||||
pProgram,
|
||||
2,
|
||||
devices,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
cl_kernel pKernel = clCreateKernel(pProgram, "some_kernel", &retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
MultiDeviceKernel *kernel = castToObject<MultiDeviceKernel>(pKernel);
|
||||
Program *program = castToObject<Program>(pProgram);
|
||||
EXPECT_EQ(4u, program->getIndirectAccessBufferVersion());
|
||||
EXPECT_FALSE(kernel->getKernelInfos()[1]->kernelDescriptor.kernelMetadata.isGeneratedByIgc);
|
||||
|
||||
retVal = clReleaseKernel(pKernel);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
retVal = clReleaseProgram(pProgram);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
gEnvironment->igcPopDebugVars();
|
||||
gEnvironment->fclPopDebugVars();
|
||||
}
|
||||
|
||||
TEST_F(ClBuildProgramMultiDeviceTests, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) {
|
||||
MockUnrestrictiveContextMultiGPU context;
|
||||
cl_program pProgram = nullptr;
|
||||
|
||||
@@ -661,21 +661,15 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
|
||||
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs;
|
||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||
uint64_t implicitArgsSize = 0;
|
||||
uint8_t version = kernelDescriptor.kernelMetadata.indirectAccessBuffer;
|
||||
if (version == 0) {
|
||||
version = pDevice->getGfxCoreHelper().getImplicitArgsVersion();
|
||||
}
|
||||
|
||||
if (version == 0) {
|
||||
if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {
|
||||
implicitArgsSize = ImplicitArgsV0::getAlignedSize();
|
||||
} else if (version == 1) {
|
||||
} else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 1) {
|
||||
implicitArgsSize = ImplicitArgsV1::getAlignedSize();
|
||||
} else if (version == 2) {
|
||||
} else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 2) {
|
||||
implicitArgsSize = ImplicitArgsV2::getAlignedSize();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
// Choose relocation size based on relocation type
|
||||
auto patchSize = pImplicitArgsReloc.second == RelocationInfo::Type::address ? 8 : 4;
|
||||
patchWithRequiredSize(pImplicitArgsReloc.first, patchSize, implicitArgsSize);
|
||||
|
||||
@@ -61,7 +61,6 @@ SingleDeviceBinary unpackSingleZebin(const ArrayRef<const uint8_t> archive, cons
|
||||
if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) {
|
||||
validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret);
|
||||
} else {
|
||||
Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret);
|
||||
const auto &flags = reinterpret_cast<const NEO::Zebin::Elf::ZebinTargetFlags &>(elf.elfFileHeader->flags);
|
||||
validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily
|
||||
? (requestedTargetDevice.coreFamily == static_cast<GFXCORE_FAMILY>(elf.elfFileHeader->machine))
|
||||
@@ -131,7 +130,6 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s
|
||||
|
||||
for (auto &kernelInfo : dst.kernelInfos) {
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc;
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.indirectAccessBuffer = src.generatorFeatureVersions.indirectAccessBuffer;
|
||||
|
||||
if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
@@ -267,7 +267,6 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass {
|
||||
uint16_t compiledSubGroupsNumber = 0U;
|
||||
uint8_t requiredSubGroupSize = 0U;
|
||||
uint8_t requiredThreadGroupDispatchSize = 0U;
|
||||
uint8_t indirectAccessBuffer = 0u;
|
||||
bool isGeneratedByIgc = true;
|
||||
} kernelMetadata;
|
||||
|
||||
|
||||
@@ -2313,82 +2313,6 @@ HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThen
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
||||
}
|
||||
|
||||
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndKernelDescriptorWithImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) {
|
||||
DebugManagerStateRestore restore;
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
uint32_t getImplicitArgsVersion() const override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
NEO::LinkerInput linkerInput;
|
||||
|
||||
vISA::GenRelocEntry reloc = {};
|
||||
std::string relocationName = implicitArgsRelocationSymbolName;
|
||||
memcpy_s(reloc.r_symbol, 1024, relocationName.c_str(), relocationName.size());
|
||||
reloc.r_offset = 8;
|
||||
reloc.r_type = vISA::GenRelocType::R_SYM_ADDR_32;
|
||||
|
||||
vISA::GenRelocEntry reloc64 = {};
|
||||
memcpy_s(reloc64.r_symbol, 1024, relocationName.c_str(), relocationName.size());
|
||||
reloc64.r_offset = 16;
|
||||
reloc64.r_type = vISA::GenRelocType::R_SYM_ADDR;
|
||||
|
||||
vISA::GenRelocEntry relocs[] = {reloc, reloc64};
|
||||
constexpr uint32_t numRelocations = 2;
|
||||
bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0);
|
||||
EXPECT_TRUE(decodeRelocSuccess);
|
||||
|
||||
NEO::Linker linker(linkerInput);
|
||||
NEO::Linker::SegmentInfo globalVarSegment, globalConstSegment, exportedFuncSegment;
|
||||
globalVarSegment.gpuAddress = 8;
|
||||
globalVarSegment.segmentSize = 64;
|
||||
globalConstSegment.gpuAddress = 128;
|
||||
globalConstSegment.segmentSize = 256;
|
||||
exportedFuncSegment.gpuAddress = 4096;
|
||||
exportedFuncSegment.segmentSize = 1024;
|
||||
NEO::Linker::UnresolvedExternals unresolvedExternals;
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors;
|
||||
NEO::Linker::ExternalFunctionsT externalFunctions;
|
||||
KernelDescriptor kernelDescriptor;
|
||||
kernelDescriptors.push_back(&kernelDescriptor);
|
||||
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
kernelDescriptor.kernelAttributes.flags.useStackCalls = true;
|
||||
kernelDescriptor.kernelMetadata.indirectAccessBuffer = 1;
|
||||
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
MockExecutionEnvironment executionEnvironment(&hwInfo, false, 1);
|
||||
executionEnvironment.incRefInternal();
|
||||
|
||||
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
|
||||
auto rootDeviceIndex = deviceFactory.rootDevices[0]->getRootDeviceIndex();
|
||||
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*deviceFactory.rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]);
|
||||
|
||||
std::vector<char> instructionSegment;
|
||||
uint32_t initData = 0x77777777;
|
||||
instructionSegment.resize(32, static_cast<char>(initData));
|
||||
NEO::Linker::PatchableSegment seg0;
|
||||
seg0.hostPointer = instructionSegment.data();
|
||||
seg0.segmentSize = instructionSegment.size();
|
||||
NEO::Linker::PatchableSegments patchableInstructionSegments{seg0};
|
||||
|
||||
auto linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {},
|
||||
nullptr, nullptr, patchableInstructionSegments, unresolvedExternals,
|
||||
deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions);
|
||||
EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult);
|
||||
|
||||
auto addressToPatch = reinterpret_cast<const uint32_t *>(instructionSegment.data() + reloc.r_offset);
|
||||
|
||||
EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), *addressToPatch);
|
||||
EXPECT_EQ(initData, *(addressToPatch - 1));
|
||||
EXPECT_EQ(initData, *(addressToPatch + 1));
|
||||
|
||||
auto addressToPatch64 = (instructionSegment.data() + reloc64.r_offset);
|
||||
uint64_t patchedValue64 = 0;
|
||||
memcpy_s(&patchedValue64, sizeof(patchedValue64), addressToPatch64, sizeof(patchedValue64));
|
||||
EXPECT_EQ(ImplicitArgsV1::getAlignedSize(), patchedValue64);
|
||||
}
|
||||
|
||||
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsWithUnknownVersionWhenLinkingThenUnrecoverableIfCalled) {
|
||||
DebugManagerStateRestore restore;
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
|
||||
Reference in New Issue
Block a user