mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 02:18:05 +08:00
fix: prevent underflow in per thread data offset calculation
Related-To: NEO-14719 Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6368f43df8
commit
b462f990b6
@@ -450,8 +450,7 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
|
|||||||
|
|
||||||
auto relocAddress = ptrOffset(segment.hostPointer, static_cast<uintptr_t>(relocation.offset));
|
auto relocAddress = ptrOffset(segment.hostPointer, static_cast<uintptr_t>(relocation.offset));
|
||||||
if (relocation.type == LinkerInput::RelocationInfo::Type::perThreadPayloadOffset) {
|
if (relocation.type == LinkerInput::RelocationInfo::Type::perThreadPayloadOffset) {
|
||||||
uint32_t crossThreadDataSize = kernelDescriptors.at(segId)->kernelAttributes.crossThreadDataSize - kernelDescriptors.at(segId)->kernelAttributes.inlineDataPayloadSize;
|
*reinterpret_cast<uint32_t *>(relocAddress) = kernelDescriptors.at(segId)->getPerThreadDataOffset();
|
||||||
*reinterpret_cast<uint32_t *>(relocAddress) = crossThreadDataSize;
|
|
||||||
} else if (relocation.symbolName == implicitArgsRelocationSymbolName) {
|
} else if (relocation.symbolName == implicitArgsRelocationSymbolName) {
|
||||||
pImplicitArgsRelocationAddresses[static_cast<uint32_t>(segId)].push_back(std::pair<void *, RelocationInfo::Type>(relocAddress, relocation.type));
|
pImplicitArgsRelocationAddresses[static_cast<uint32_t>(segId)].push_back(std::pair<void *, RelocationInfo::Type>(relocAddress, relocation.type));
|
||||||
} else if (relocation.symbolName.empty()) {
|
} else if (relocation.symbolName.empty()) {
|
||||||
@@ -701,8 +700,7 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
|
|||||||
|
|
||||||
auto kernelDescriptor = std::find_if(kernelDescriptors.begin(), kernelDescriptors.end(), [&kernelName](const KernelDescriptor *obj) { return obj->kernelMetadata.kernelName == kernelName; });
|
auto kernelDescriptor = std::find_if(kernelDescriptors.begin(), kernelDescriptors.end(), [&kernelName](const KernelDescriptor *obj) { return obj->kernelMetadata.kernelName == kernelName; });
|
||||||
if (kernelDescriptor != std::end(kernelDescriptors)) {
|
if (kernelDescriptor != std::end(kernelDescriptors)) {
|
||||||
uint64_t crossThreadDataSize = (*kernelDescriptor)->kernelAttributes.crossThreadDataSize - (*kernelDescriptor)->kernelAttributes.inlineDataPayloadSize;
|
symbol.gpuAddress = (*kernelDescriptor)->getPerThreadDataOffset();
|
||||||
symbol.gpuAddress = crossThreadDataSize;
|
|
||||||
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
|
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
|
||||||
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
|
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
|
||||||
|
|
||||||
|
|||||||
@@ -60,6 +60,9 @@ struct KernelDescriptor : NEO::NonCopyableAndNonMovableClass {
|
|||||||
const BindlessToSurfaceStateMap &getBindlessOffsetToSurfaceState() const {
|
const BindlessToSurfaceStateMap &getBindlessOffsetToSurfaceState() const {
|
||||||
return bindlessArgsMap;
|
return bindlessArgsMap;
|
||||||
}
|
}
|
||||||
|
uint16_t getPerThreadDataOffset() const {
|
||||||
|
return kernelAttributes.crossThreadDataSize - std::min(kernelAttributes.crossThreadDataSize, kernelAttributes.inlineDataPayloadSize);
|
||||||
|
}
|
||||||
|
|
||||||
struct KernelAttributes {
|
struct KernelAttributes {
|
||||||
uint32_t slmInlineSize = 0U;
|
uint32_t slmInlineSize = 0U;
|
||||||
|
|||||||
@@ -783,10 +783,8 @@ HWTEST_F(LinkerTests, givenInvalidLinkerInputThenLinkerFails) {
|
|||||||
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenSubDeviceIDSymbolsAreRemoved) {
|
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenSubDeviceIDSymbolsAreRemoved) {
|
||||||
struct LinkerMock : public NEO::Linker {
|
struct LinkerMock : public NEO::Linker {
|
||||||
public:
|
public:
|
||||||
|
using NEO::Linker::Linker;
|
||||||
using NEO::Linker::resolveBuiltins;
|
using NEO::Linker::resolveBuiltins;
|
||||||
|
|
||||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
NEO::LinkerInput linkerInput;
|
NEO::LinkerInput linkerInput;
|
||||||
@@ -867,10 +865,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalsWhenLinkThenSubDeviceIDSymbolsAreC
|
|||||||
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenPerThreadOffSymbolsAreResolvedAndRemoved) {
|
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenPerThreadOffSymbolsAreResolvedAndRemoved) {
|
||||||
struct LinkerMock : public NEO::Linker {
|
struct LinkerMock : public NEO::Linker {
|
||||||
public:
|
public:
|
||||||
|
using NEO::Linker::Linker;
|
||||||
using NEO::Linker::resolveBuiltins;
|
using NEO::Linker::resolveBuiltins;
|
||||||
|
|
||||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const uint64_t kernel1RelocOffset = 40;
|
const uint64_t kernel1RelocOffset = 40;
|
||||||
@@ -922,10 +918,8 @@ HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledT
|
|||||||
HWTEST_F(LinkerTests, givenPerThreadOffSymbolInUnresolvedExternalSymbolsAndMissingKernelDescriptorForPerThreadOffSymbolWhenResolveBuiltinsThenPerThreadOffSymbolIsNotResolved) {
|
HWTEST_F(LinkerTests, givenPerThreadOffSymbolInUnresolvedExternalSymbolsAndMissingKernelDescriptorForPerThreadOffSymbolWhenResolveBuiltinsThenPerThreadOffSymbolIsNotResolved) {
|
||||||
struct LinkerMock : public NEO::Linker {
|
struct LinkerMock : public NEO::Linker {
|
||||||
public:
|
public:
|
||||||
|
using NEO::Linker::Linker;
|
||||||
using NEO::Linker::resolveBuiltins;
|
using NEO::Linker::resolveBuiltins;
|
||||||
|
|
||||||
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const uint64_t kernelRelocOffset = 40;
|
const uint64_t kernelRelocOffset = 40;
|
||||||
@@ -961,6 +955,40 @@ HWTEST_F(LinkerTests, givenPerThreadOffSymbolInUnresolvedExternalSymbolsAndMissi
|
|||||||
EXPECT_TRUE(isPerThreadOffUnresolved);
|
EXPECT_TRUE(isPerThreadOffUnresolved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(LinkerTests, givenUnresolvedExternalSymbolsAndCrossThreadDataSmallerThanInlineDataWhenResolveBuiltinsIsCalledThenPerThreadOffSymbolIsResolvedAndRemoved) {
|
||||||
|
struct LinkerMock : public NEO::Linker {
|
||||||
|
public:
|
||||||
|
using NEO::Linker::Linker;
|
||||||
|
using NEO::Linker::resolveBuiltins;
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint64_t kernelRelocOffset = 40u;
|
||||||
|
|
||||||
|
NEO::LinkerInput linkerInput{};
|
||||||
|
LinkerMock linker(linkerInput);
|
||||||
|
NEO::Linker::UnresolvedExternals unresolvedExternals{};
|
||||||
|
unresolvedExternals.push_back({{NEO::Linker::perThreadOff, kernelRelocOffset, NEO::Linker::RelocationInfo::Type::address16, NEO::SegmentType::instructions, ".text.kernel_func1"}, 0u, false});
|
||||||
|
|
||||||
|
std::vector<char> instructionSegment{};
|
||||||
|
instructionSegment.resize(kernelRelocOffset + 16);
|
||||||
|
NEO::Linker::PatchableSegments instructionsSegments{};
|
||||||
|
instructionsSegments.push_back({instructionSegment.data(), 0u});
|
||||||
|
|
||||||
|
KernelDescriptor kernelDescriptor1{};
|
||||||
|
kernelDescriptor1.kernelMetadata.kernelName = "kernel_func1";
|
||||||
|
kernelDescriptor1.kernelAttributes.crossThreadDataSize = 40u;
|
||||||
|
kernelDescriptor1.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||||
|
|
||||||
|
NEO::Linker::KernelDescriptorsT kernelDescriptors{};
|
||||||
|
kernelDescriptors.push_back(&kernelDescriptor1);
|
||||||
|
linker.resolveBuiltins(pDevice, unresolvedExternals, instructionsSegments, kernelDescriptors);
|
||||||
|
|
||||||
|
EXPECT_EQ(0U, unresolvedExternals.size());
|
||||||
|
|
||||||
|
uint16_t gpuAddress = 0u;
|
||||||
|
EXPECT_EQ(*reinterpret_cast<uint16_t *>(&instructionSegment[kernelRelocOffset]), gpuAddress);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPartially) {
|
HWTEST_F(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPartially) {
|
||||||
NEO::LinkerInput linkerInput;
|
NEO::LinkerInput linkerInput;
|
||||||
vISA::GenRelocEntry entry = {};
|
vISA::GenRelocEntry entry = {};
|
||||||
@@ -2737,3 +2765,32 @@ TEST_F(LinkerTests, givenPerThreadPayloadOffsetRelocationWhenPatchingInstruction
|
|||||||
uint32_t expectedPatchedValue = kd.kernelAttributes.crossThreadDataSize - kd.kernelAttributes.inlineDataPayloadSize;
|
uint32_t expectedPatchedValue = kd.kernelAttributes.crossThreadDataSize - kd.kernelAttributes.inlineDataPayloadSize;
|
||||||
EXPECT_EQ(expectedPatchedValue, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
EXPECT_EQ(expectedPatchedValue, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LinkerTests, givenPerThreadPayloadOffsetRelocationAndCrossThreadDataSmallerThanInlineDataWhenPatchingInstructionSegmentsThenPatchItWithOffsetZero) {
|
||||||
|
WhiteBox<NEO::LinkerInput> linkerInput{};
|
||||||
|
linkerInput.traits.requiresPatchingOfInstructionSegments = true;
|
||||||
|
NEO::LinkerInput::RelocationInfo rel{};
|
||||||
|
rel.offset = 0x4;
|
||||||
|
rel.type = NEO::LinkerInput::RelocationInfo::Type::perThreadPayloadOffset;
|
||||||
|
rel.relocationSegment = NEO::SegmentType::instructions;
|
||||||
|
linkerInput.textRelocations.push_back({rel});
|
||||||
|
|
||||||
|
NEO::Linker::KernelDescriptorsT kernelDescriptors{};
|
||||||
|
KernelDescriptor kd{};
|
||||||
|
kd.kernelAttributes.crossThreadDataSize = 40u;
|
||||||
|
kd.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||||
|
kernelDescriptors.push_back(&kd);
|
||||||
|
|
||||||
|
WhiteBox<NEO::Linker> linker(linkerInput);
|
||||||
|
|
||||||
|
uint64_t segmentData{0};
|
||||||
|
NEO::Linker::PatchableSegment segmentToPatch{};
|
||||||
|
segmentToPatch.hostPointer = reinterpret_cast<void *>(&segmentData);
|
||||||
|
segmentToPatch.segmentSize = sizeof(segmentData);
|
||||||
|
|
||||||
|
NEO::Linker::UnresolvedExternals unresolvedExternals{};
|
||||||
|
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
|
||||||
|
auto perThreadPayloadOffsetPatchedValue = reinterpret_cast<uint32_t *>(ptrOffset(segmentToPatch.hostPointer, static_cast<size_t>(rel.offset)));
|
||||||
|
uint32_t expectedPatchedValue = 0u;
|
||||||
|
EXPECT_EQ(expectedPatchedValue, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
||||||
|
}
|
||||||
|
|||||||
@@ -273,3 +273,20 @@ TEST(KernelDescriptor, GivenDescriptorWithoutStatefulArgsWhenInitBindlessOffsets
|
|||||||
desc.initBindlessOffsetToSurfaceState();
|
desc.initBindlessOffsetToSurfaceState();
|
||||||
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
|
EXPECT_EQ(0u, desc.bindlessArgsMap.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(KernelDescriptor, GivenDescriptorWhenGettingPerThreadDataOffsetThenItReturnsCorrectValue) {
|
||||||
|
NEO::KernelDescriptor desc{};
|
||||||
|
|
||||||
|
desc.kernelAttributes.crossThreadDataSize = 64u;
|
||||||
|
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||||
|
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
|
||||||
|
|
||||||
|
// crossThreadData is fully consumed by inlineDataPayload
|
||||||
|
desc.kernelAttributes.crossThreadDataSize = 40u;
|
||||||
|
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||||
|
EXPECT_EQ(0u, desc.getPerThreadDataOffset());
|
||||||
|
|
||||||
|
desc.kernelAttributes.crossThreadDataSize = 128u;
|
||||||
|
desc.kernelAttributes.inlineDataPayloadSize = 64u;
|
||||||
|
EXPECT_EQ(64u, desc.getPerThreadDataOffset());
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user