mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 20:39:56 +08:00
fix: prevent underflow in per thread data offset calculation
Related-To: NEO-14719 Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6368f43df8
commit
b462f990b6
@@ -450,8 +450,7 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
|
||||
|
||||
auto relocAddress = ptrOffset(segment.hostPointer, static_cast<uintptr_t>(relocation.offset));
|
||||
if (relocation.type == LinkerInput::RelocationInfo::Type::perThreadPayloadOffset) {
|
||||
uint32_t crossThreadDataSize = kernelDescriptors.at(segId)->kernelAttributes.crossThreadDataSize - kernelDescriptors.at(segId)->kernelAttributes.inlineDataPayloadSize;
|
||||
*reinterpret_cast<uint32_t *>(relocAddress) = crossThreadDataSize;
|
||||
*reinterpret_cast<uint32_t *>(relocAddress) = kernelDescriptors.at(segId)->getPerThreadDataOffset();
|
||||
} else if (relocation.symbolName == implicitArgsRelocationSymbolName) {
|
||||
pImplicitArgsRelocationAddresses[static_cast<uint32_t>(segId)].push_back(std::pair<void *, RelocationInfo::Type>(relocAddress, relocation.type));
|
||||
} else if (relocation.symbolName.empty()) {
|
||||
@@ -701,8 +700,7 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
|
||||
|
||||
auto kernelDescriptor = std::find_if(kernelDescriptors.begin(), kernelDescriptors.end(), [&kernelName](const KernelDescriptor *obj) { return obj->kernelMetadata.kernelName == kernelName; });
|
||||
if (kernelDescriptor != std::end(kernelDescriptors)) {
|
||||
uint64_t crossThreadDataSize = (*kernelDescriptor)->kernelAttributes.crossThreadDataSize - (*kernelDescriptor)->kernelAttributes.inlineDataPayloadSize;
|
||||
symbol.gpuAddress = crossThreadDataSize;
|
||||
symbol.gpuAddress = (*kernelDescriptor)->getPerThreadDataOffset();
|
||||
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
|
||||
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
|
||||
|
||||
|
||||
@@ -60,6 +60,9 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass {
|
||||
const BindlessToSurfaceStateMap &getBindlessOffsetToSurfaceState() const {
|
||||
return bindlessArgsMap;
|
||||
}
|
||||
uint16_t getPerThreadDataOffset() const {
|
||||
return kernelAttributes.crossThreadDataSize - std::min(kernelAttributes.crossThreadDataSize, kernelAttributes.inlineDataPayloadSize);
|
||||
}
|
||||
|
||||
struct KernelAttributes {
|
||||
uint32_t slmInlineSize = 0U;
|
||||
|
||||
@@ -783,10 +783,8 @@ HWTEST_F(LinkerTests, givenInvalidLinkerInputThenLinkerFails) {
|
||||
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenSubDeviceIDSymbolsAreRemoved) {
|
||||
struct LinkerMock : public NEO::Linker {
|
||||
public:
|
||||
using NEO::Linker::Linker;
|
||||
using NEO::Linker::resolveBuiltins;
|
||||
|
||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
||||
}
|
||||
};
|
||||
|
||||
NEO::LinkerInput linkerInput;
|
||||
@@ -867,10 +865,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalsWhenLinkThenSubDeviceIDSymbolsAreC
|
||||
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenPerThreadOffSymbolsAreResolvedAndRemoved) {
|
||||
struct LinkerMock : public NEO::Linker {
|
||||
public:
|
||||
using NEO::Linker::Linker;
|
||||
using NEO::Linker::resolveBuiltins;
|
||||
|
||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
||||
}
|
||||
};
|
||||
|
||||
const uint64_t kernel1RelocOffset = 40;
|
||||
@@ -922,10 +918,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledT
|
||||
HWTEST_F(LinkerTests, givenPerThreadOffSymbolInUnresolvedExternalSymbolsAndMissingKernelDescriptorForPerThreadOffSymbolWhenResolveBuiltinsThenPerThreadOffSymbolIsNotResolved) {
|
||||
struct LinkerMock : public NEO::Linker {
|
||||
public:
|
||||
using NEO::Linker::Linker;
|
||||
using NEO::Linker::resolveBuiltins;
|
||||
|
||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
||||
}
|
||||
};
|
||||
|
||||
const uint64_t kernelRelocOffset = 40;
|
||||
@@ -961,6 +955,40 @@ HWTEST_F(LinkerTests, givenPerThreadOffSymbolInUnresolvedExternalSymbolsAndMissi
|
||||
EXPECT_TRUE(isPerThreadOffUnresolved);
|
||||
}
|
||||
|
||||
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsAndCrossThreadDataSmallerThanInlineDataWhenResolveBuiltinsIsCalledThenPerThreadOffSymbolIsResolvedAndRemoved) {
|
||||
struct LinkerMock : public NEO::Linker {
|
||||
public:
|
||||
using NEO::Linker::Linker;
|
||||
using NEO::Linker::resolveBuiltins;
|
||||
};
|
||||
|
||||
const uint64_t kernelRelocOffset = 40u;
|
||||
|
||||
NEO::LinkerInput linkerInput{};
|
||||
LinkerMock linker(linkerInput);
|
||||
NEO::Linker::UnresolvedExternals unresolvedExternals{};
|
||||
unresolvedExternals.push_back({{NEO::Linker::perThreadOff, kernelRelocOffset, NEO::Linker::RelocationInfo::Type::address16, NEO::SegmentType::instructions, ".text.kernel_func1"}, 0u, false});
|
||||
|
||||
std::vector<char> instructionSegment{};
|
||||
instructionSegment.resize(kernelRelocOffset + 16);
|
||||
NEO::Linker::PatchableSegments instructionsSegments{};
|
||||
instructionsSegments.push_back({instructionSegment.data(), 0u});
|
||||
|
||||
KernelDescriptor kernelDescriptor1{};
|
||||
kernelDescriptor1.kernelMetadata.kernelName = "kernel_func1";
|
||||
kernelDescriptor1.kernelAttributes.crossThreadDataSize = 40u;
|
||||
kernelDescriptor1.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors{};
|
||||
kernelDescriptors.push_back(&kernelDescriptor1);
|
||||
linker.resolveBuiltins(pDevice, unresolvedExternals, instructionsSegments, kernelDescriptors);
|
||||
|
||||
EXPECT_EQ(0U, unresolvedExternals.size());
|
||||
|
||||
uint16_t gpuAddress = 0u;
|
||||
EXPECT_EQ(*reinterpret_cast<uint16_t *>(&instructionSegment[kernelRelocOffset]), gpuAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPartially) {
|
||||
NEO::LinkerInput linkerInput;
|
||||
vISA::GenRelocEntry entry = {};
|
||||
@@ -2737,3 +2765,32 @@ TEST_F(LinkerTests, givenPerThreadPayloadOffsetRelocationWhenPatchingInstruction
|
||||
uint32_t expectedPatchedValue = kd.kernelAttributes.crossThreadDataSize - kd.kernelAttributes.inlineDataPayloadSize;
|
||||
EXPECT_EQ(expectedPatchedValue, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
||||
}
|
||||
|
||||
TEST_F(LinkerTests, givenPerThreadPayloadOffsetRelocationAndCrossThreadDataSmallerThanInlineDataWhenPatchingInstructionSegmentsThenPatchItWithOffsetZero) {
|
||||
WhiteBox<NEO::LinkerInput> linkerInput{};
|
||||
linkerInput.traits.requiresPatchingOfInstructionSegments = true;
|
||||
NEO::LinkerInput::RelocationInfo rel{};
|
||||
rel.offset = 0x4;
|
||||
rel.type = NEO::LinkerInput::RelocationInfo::Type::perThreadPayloadOffset;
|
||||
rel.relocationSegment = NEO::SegmentType::instructions;
|
||||
linkerInput.textRelocations.push_back({rel});
|
||||
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors{};
|
||||
KernelDescriptor kd{};
|
||||
kd.kernelAttributes.crossThreadDataSize = 40u;
|
||||
kd.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||
kernelDescriptors.push_back(&kd);
|
||||
|
||||
WhiteBox<NEO::Linker> linker(linkerInput);
|
||||
|
||||
uint64_t segmentData{0};
|
||||
NEO::Linker::PatchableSegment segmentToPatch{};
|
||||
segmentToPatch.hostPointer = reinterpret_cast<void *>(&segmentData);
|
||||
segmentToPatch.segmentSize = sizeof(segmentData);
|
||||
|
||||
NEO::Linker::UnresolvedExternals unresolvedExternals{};
|
||||
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
|
||||
auto perThreadPayloadOffsetPatchedValue = reinterpret_cast<uint32_t *>(ptrOffset(segmentToPatch.hostPointer, static_cast<size_t>(rel.offset)));
|
||||
uint32_t expectedPatchedValue = 0u;
|
||||
EXPECT_EQ(expectedPatchedValue, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
||||
}
|
||||
|
||||
@@ -273,3 +273,20 @@ TEST(KernelDescriptor, GivenDescriptorWithoutStatefulArgsWhenInitBindlessOffsets
|
||||
desc.initBindlessOffsetToSurfaceState();
|
||||
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
|
||||
}
|
||||
|
||||
TEST(KernelDescriptor, GivenDescriptorWhenGettingPerThreadDataOffsetThenItReturnsCorrectValue) {
|
||||
NEO::KernelDescriptor desc{};
|
||||
|
||||
desc.kernelAttributes.crossThreadDataSize = 64u;
|
||||
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
|
||||
|
||||
// crossThreadData is fully consumed by inlineDataPayload
|
||||
desc.kernelAttributes.crossThreadDataSize = 40u;
|
||||
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
|
||||
|
||||
desc.kernelAttributes.crossThreadDataSize = 128u;
|
||||
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||
EXPECT_EQ(64u, desc.getPerThreadDataOffset());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user