mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
fix: implicit arg buffer
- refactor validating target device and dcoding Intel GT Notes - decoded versions are stored in singleDeviceBinary when decoding zebin - adds parsing Intel GT notes from elf when unpacking binary - sets indirectAccessBufferMajorVersion with correct value - fix ImplicitArgsV1 - add simdWidth - use correct simd size in patchImplicitArgs() Related-To: NEO-16167, NEO-15211, IGC-12358 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3a5b197f3a
commit
12263b2e7c
@@ -681,11 +681,16 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
|
||||
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= addImplcictArgs;
|
||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||
uint64_t implicitArgsSize = 0;
|
||||
if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {
|
||||
uint8_t version = kernelDescriptor.kernelMetadata.indirectAccessBuffer;
|
||||
if (version == 0) {
|
||||
version = pDevice->getGfxCoreHelper().getImplicitArgsVersion();
|
||||
}
|
||||
|
||||
if (version == 0) {
|
||||
implicitArgsSize = ImplicitArgsV0::getAlignedSize();
|
||||
} else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 1) {
|
||||
} else if (version == 1) {
|
||||
implicitArgsSize = ImplicitArgsV1::getAlignedSize();
|
||||
} else if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 2) {
|
||||
} else if (version == 2) {
|
||||
implicitArgsSize = ImplicitArgsV2::getAlignedSize();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
|
||||
@@ -59,8 +59,9 @@ SingleDeviceBinary unpackSingleZebin(const ArrayRef<const uint8_t> archive, cons
|
||||
|
||||
bool validForTarget = true;
|
||||
if (elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) {
|
||||
validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret);
|
||||
validForTarget &= Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret.generatorFeatureVersions, ret.generator);
|
||||
} else {
|
||||
Zebin::validateTargetDevice(elf, requestedTargetDevice, outErrReason, outWarning, ret.generatorFeatureVersions, ret.generator);
|
||||
const auto &flags = reinterpret_cast<const NEO::Zebin::Elf::ZebinTargetFlags &>(elf.elfFileHeader->flags);
|
||||
validForTarget &= flags.machineEntryUsesGfxCoreInsteadOfProductFamily
|
||||
? (requestedTargetDevice.coreFamily == static_cast<GFXCORE_FAMILY>(elf.elfFileHeader->machine))
|
||||
@@ -114,10 +115,16 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s
|
||||
return DecodeError::invalidBinary;
|
||||
}
|
||||
|
||||
GeneratorFeatureVersions generatorFeatures = {};
|
||||
GeneratorType generator = {};
|
||||
auto ret = Zebin::validateTargetDevice(elf, src.targetDevice, outErrReason, outWarning, generatorFeatures, generator);
|
||||
if (!ret && elf.elfFileHeader->machine == Elf::ElfMachine::EM_INTELGT) {
|
||||
return DecodeError::invalidBinary;
|
||||
}
|
||||
dst.grfSize = src.targetDevice.grfSize;
|
||||
dst.minScratchSpaceSize = src.targetDevice.minScratchSpaceSize;
|
||||
dst.indirectDetectionVersion = src.generatorFeatureVersions.indirectMemoryAccessDetection;
|
||||
dst.indirectAccessBufferMajorVersion = src.generatorFeatureVersions.indirectAccessBuffer;
|
||||
dst.indirectDetectionVersion = generatorFeatures.indirectMemoryAccessDetection;
|
||||
dst.indirectAccessBufferMajorVersion = generatorFeatures.indirectAccessBuffer;
|
||||
dst.samplerStateSize = src.targetDevice.samplerStateSize;
|
||||
dst.samplerBorderColorStateSize = src.targetDevice.samplerBorderColorStateSize;
|
||||
|
||||
@@ -126,10 +133,11 @@ DecodeError decodeSingleZebin(ProgramInfo &dst, const SingleDeviceBinary &src, s
|
||||
return decodeError;
|
||||
}
|
||||
|
||||
const bool isGeneratedByIgc = src.generator == GeneratorType::igc;
|
||||
const bool isGeneratedByIgc = generator == GeneratorType::igc;
|
||||
|
||||
for (auto &kernelInfo : dst.kernelInfos) {
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.isGeneratedByIgc = isGeneratedByIgc;
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.indirectAccessBuffer = generatorFeatures.indirectAccessBuffer;
|
||||
|
||||
if (KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
@@ -75,6 +75,12 @@ struct TargetDevice {
|
||||
};
|
||||
TargetDevice getTargetDevice(const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
struct GeneratorFeatureVersions {
|
||||
using VersionT = uint32_t;
|
||||
VersionT indirectMemoryAccessDetection = 0u;
|
||||
VersionT indirectAccessBuffer = 0u;
|
||||
};
|
||||
|
||||
struct SingleDeviceBinary {
|
||||
DeviceBinaryFormat format = DeviceBinaryFormat::unknown;
|
||||
ArrayRef<const uint8_t> deviceBinary;
|
||||
@@ -84,11 +90,7 @@ struct SingleDeviceBinary {
|
||||
ConstStringRef buildOptions;
|
||||
TargetDevice targetDevice;
|
||||
GeneratorType generator = GeneratorType::igc;
|
||||
struct GeneratorFeatureVersions {
|
||||
using VersionT = uint32_t;
|
||||
VersionT indirectMemoryAccessDetection = 0u;
|
||||
VersionT indirectAccessBuffer = 0u;
|
||||
} generatorFeatureVersions;
|
||||
GeneratorFeatureVersions generatorFeatureVersions;
|
||||
};
|
||||
|
||||
template <DeviceBinaryFormat format>
|
||||
|
||||
@@ -92,10 +92,10 @@ bool validateTargetDevice(const TargetDevice &targetDevice, Elf::ElfIdentifierCl
|
||||
return true;
|
||||
}
|
||||
|
||||
template bool validateTargetDevice<Elf::EI_CLASS_32>(const Elf::Elf<Elf::EI_CLASS_32> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary);
|
||||
template bool validateTargetDevice<Elf::EI_CLASS_64>(const Elf::Elf<Elf::EI_CLASS_64> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary);
|
||||
template bool validateTargetDevice<Elf::EI_CLASS_32>(const Elf::Elf<Elf::EI_CLASS_32> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator);
|
||||
template bool validateTargetDevice<Elf::EI_CLASS_64>(const Elf::Elf<Elf::EI_CLASS_64> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator);
|
||||
template <Elf::ElfIdentifierClass numBits>
|
||||
bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary) {
|
||||
bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator) {
|
||||
GFXCORE_FAMILY gfxCore = IGFX_UNKNOWN_CORE;
|
||||
PRODUCT_FAMILY productFamily = IGFX_UNKNOWN;
|
||||
AOT::PRODUCT_CONFIG productConfig = AOT::UNKNOWN_ISA;
|
||||
@@ -123,7 +123,7 @@ bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targ
|
||||
DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size());
|
||||
auto targetMetadataPacked = reinterpret_cast<const uint32_t *>(intelGTNote.data.begin());
|
||||
targetMetadata.packed = static_cast<uint32_t>(*targetMetadataPacked);
|
||||
singleDeviceBinary.generator = static_cast<GeneratorType>(targetMetadata.generatorId);
|
||||
generator = static_cast<GeneratorType>(targetMetadata.generatorId);
|
||||
break;
|
||||
}
|
||||
case Elf::IntelGTSectionType::zebinVersion: {
|
||||
@@ -155,13 +155,13 @@ bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targ
|
||||
case Elf::IntelGTSectionType::indirectAccessDetectionVersion: {
|
||||
DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size());
|
||||
auto indirectDetectionVersion = reinterpret_cast<const uint32_t *>(intelGTNote.data.begin());
|
||||
singleDeviceBinary.generatorFeatureVersions.indirectMemoryAccessDetection = static_cast<uint32_t>(*indirectDetectionVersion);
|
||||
generatorFeatures.indirectMemoryAccessDetection = static_cast<uint32_t>(*indirectDetectionVersion);
|
||||
break;
|
||||
}
|
||||
case Elf::IntelGTSectionType::indirectAccessBufferMajorVersion: {
|
||||
DEBUG_BREAK_IF(sizeof(uint32_t) != intelGTNote.data.size());
|
||||
auto indirectDetectionVersion = reinterpret_cast<const uint32_t *>(intelGTNote.data.begin());
|
||||
singleDeviceBinary.generatorFeatureVersions.indirectAccessBuffer = static_cast<uint32_t>(*indirectDetectionVersion);
|
||||
generatorFeatures.indirectAccessBuffer = static_cast<uint32_t>(*indirectDetectionVersion);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -52,7 +52,7 @@ bool isZebin(ArrayRef<const uint8_t> binary);
|
||||
bool validateTargetDevice(const TargetDevice &targetDevice, Elf::ElfIdentifierClass numBits, PRODUCT_FAMILY productFamily, GFXCORE_FAMILY gfxCore, AOT::PRODUCT_CONFIG productConfig, Zebin::Elf::ZebinTargetFlags targetMetadata);
|
||||
|
||||
template <Elf::ElfIdentifierClass numBits>
|
||||
bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, SingleDeviceBinary &singleDeviceBinary);
|
||||
bool validateTargetDevice(const Elf::Elf<numBits> &elf, const TargetDevice &targetDevice, std::string &outErrReason, std::string &outWarning, GeneratorFeatureVersions &generatorFeatures, GeneratorType &generator);
|
||||
|
||||
template <Elf::ElfIdentifierClass numBits>
|
||||
DecodeError decodeIntelGTNoteSection(ArrayRef<const uint8_t> intelGTNotesSection, std::vector<Elf::IntelGTNote> &intelGTNotes, std::string &outErrReason, std::string &outWarning);
|
||||
|
||||
@@ -56,7 +56,7 @@ static_assert(ImplicitArgsV0::getSize() == (28 * sizeof(uint32_t)));
|
||||
struct alignas(32) ImplicitArgsV1 {
|
||||
ImplicitArgsHeader header;
|
||||
uint8_t numWorkDim;
|
||||
uint8_t padding0;
|
||||
uint8_t simdWidth;
|
||||
uint32_t localSizeX;
|
||||
uint32_t localSizeY;
|
||||
uint32_t localSizeZ;
|
||||
@@ -71,7 +71,7 @@ struct alignas(32) ImplicitArgsV1 {
|
||||
uint32_t groupCountX;
|
||||
uint32_t groupCountY;
|
||||
uint32_t groupCountZ;
|
||||
uint32_t padding1;
|
||||
uint32_t padding0;
|
||||
uint64_t rtGlobalBufferPtr;
|
||||
uint64_t assertBufferPtr;
|
||||
uint64_t scratchPtr;
|
||||
@@ -183,12 +183,16 @@ struct alignas(32) ImplicitArgs {
|
||||
void setSimdWidth(uint32_t simd) {
|
||||
if (v0.header.structVersion == 0) {
|
||||
v0.simdWidth = simd;
|
||||
} else if (v1.header.structVersion == 1) {
|
||||
v1.simdWidth = simd;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<uint32_t> getSimdWidth() const {
|
||||
if (v0.header.structVersion == 0) {
|
||||
return v0.simdWidth;
|
||||
} else if (v1.header.structVersion == 1) {
|
||||
return v1.simdWidth;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
@@ -328,6 +332,12 @@ struct alignas(32) ImplicitArgs {
|
||||
}
|
||||
}
|
||||
|
||||
void setScratchBufferPtr(uint64_t scratchBuffer) {
|
||||
if (v1.header.structVersion == 1) {
|
||||
v1.scratchPtr = scratchBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
void setEnqueuedLocalSize(uint32_t x, uint32_t y, uint32_t z) {
|
||||
if (v1.header.structVersion == 1) {
|
||||
v1.enqueuedLocalSizeX = x;
|
||||
|
||||
@@ -59,7 +59,7 @@ uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const
|
||||
auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
|
||||
uint32_t localIdsSize = 0;
|
||||
if (false == patchImplicitArgsBufferInCrossThread) {
|
||||
auto simdSize = 32u;
|
||||
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||
auto grfSize = NEO::ImplicitArgsHelper::getGrfSize(simdSize);
|
||||
auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired;
|
||||
|
||||
@@ -91,7 +91,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
|
||||
|
||||
uint32_t lws[3] = {0, 0, 0};
|
||||
implicitArgs.getLocalSize(lws[0], lws[1], lws[2]);
|
||||
auto simdSize = implicitArgs.getSimdWidth().value_or(32);
|
||||
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||
auto grfSize = getGrfSize(simdSize);
|
||||
auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired;
|
||||
auto dimensionOrder = getDimensionOrderForLocalIds(kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, hwGenerationOfLocalIdsParams);
|
||||
|
||||
@@ -268,6 +268,7 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass {
|
||||
uint16_t compiledSubGroupsNumber = 0U;
|
||||
uint8_t requiredSubGroupSize = 0U;
|
||||
uint8_t requiredThreadGroupDispatchSize = 0U;
|
||||
uint8_t indirectAccessBuffer = 0u;
|
||||
bool isGeneratedByIgc = true;
|
||||
} kernelMetadata;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user