fix: correctly patch implicit arg buffer in indirect data

- use correct size alignment of implicit arg buffer, crosshtread data
should start after the buffer without extra padding

Related-To: NEO-14449

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2025-05-05 11:36:29 +00:00
committed by Compute-Runtime-Automation
parent 22ddaea09f
commit e345d55fe5
10 changed files with 44 additions and 33 deletions

View File

@@ -47,6 +47,7 @@ struct alignas(32) ImplicitArgsV0 {
uint8_t reserved[16];
static constexpr uint8_t getSize() { return static_cast<uint8_t>((offsetof(ImplicitArgsV0, reserved))); }
static constexpr uint8_t getAlignedSize() { return sizeof(ImplicitArgsV0); }
};
static_assert(std::alignment_of_v<ImplicitArgsV0> == 32, "Implicit args size need to be aligned to 32");
@@ -78,6 +79,7 @@ struct alignas(32) ImplicitArgsV1 {
uint8_t reserved[44];
static constexpr uint8_t getSize() { return static_cast<uint8_t>(offsetof(ImplicitArgsV1, reserved)); }
static constexpr uint8_t getAlignedSize() { return sizeof(ImplicitArgsV1); }
};
static_assert(std::alignment_of_v<ImplicitArgsV1> == 32, "Implicit args size need to be aligned to 32");
@@ -113,6 +115,18 @@ struct alignas(32) ImplicitArgs {
return 0;
}
uint8_t getAlignedSize() const {
if (v0.header.structVersion == 0) {
return ImplicitArgsV0::getAlignedSize();
} else if (v1.header.structVersion == 1) {
return ImplicitArgsV1::getAlignedSize();
}
DEBUG_BREAK_IF(true);
return 0;
}
void setNumWorkDim(uint32_t numWorkDim) {
if (v0.header.structVersion == 0) {
v0.numWorkDim = numWorkDim;

View File

@@ -48,14 +48,7 @@ uint32_t getSizeForImplicitArgsStruct(const ImplicitArgs *pImplicitArgs, const K
if (!pImplicitArgs) {
return 0;
}
auto implicitArgsSize = pImplicitArgs->getSize();
auto patchImplicitArgsBufferInCrossThread = NEO::isValidOffset<>(kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
if (patchImplicitArgsBufferInCrossThread) {
return alignUp(implicitArgsSize, MemoryConstants::cacheLineSize);
} else {
return implicitArgsSize;
}
return pImplicitArgs->getAlignedSize();
}
uint32_t getSizeForImplicitArgsPatching(const ImplicitArgs *pImplicitArgs, const KernelDescriptor &kernelDescriptor, bool isHwLocalIdGeneration, const RootDeviceEnvironment &rootDeviceEnvironment) {
@@ -112,7 +105,7 @@ void *patchImplicitArgs(void *ptrToPatch, const ImplicitArgs &implicitArgs, cons
dimensionOrder,
false, grfSize, grfCount, rootDeviceEnvironment);
auto sizeForLocalIdsProgramming = totalSizeToProgram - implicitArgs.getSize();
auto sizeForLocalIdsProgramming = totalSizeToProgram - implicitArgs.getAlignedSize();
ptrToPatch = ptrOffset(ptrToPatch, sizeForLocalIdsProgramming);
}