fix: set sync buffer ptr in implicit args

Related-To: NEO-15737

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2025-08-04 11:31:26 +00:00
committed by Compute-Runtime-Automation
parent 3eefd8a4ad
commit b7a73653a2
4 changed files with 68 additions and 0 deletions

View File

@@ -1420,6 +1420,9 @@ void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t b
NEO::patchPointer(getCrossThreadDataSpan(), NEO::patchPointer(getCrossThreadDataSpan(),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress, this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress,
static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset))); static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
if (state.pImplicitArgs) {
state.pImplicitArgs->setSyncBufferPtr(static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
}
} }
void KernelImp::patchRegionGroupBarrier(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) { void KernelImp::patchRegionGroupBarrier(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) {

View File

@@ -4099,6 +4099,34 @@ TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenPrin
EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->v0.printfBufferPtr); EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->v0.printfBufferPtr);
} }
TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenSyncBufferIsCreatedAndProperlyPatchedInImplicitArgs) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = false;
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
kernel->implicitArgsVersion = 1;
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernel->initialize(&kernelDesc);
EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
auto pImplicitArgs = kernel->getImplicitArgs();
ASSERT_NE(nullptr, pImplicitArgs);
NEO::MockGraphicsAllocation alloc;
alloc.setGpuPtr(0xffff800300060000);
alloc.allocationOffset = 0x0;
size_t bufferOffset = 0u;
kernel->patchSyncBuffer(&alloc, bufferOffset);
EXPECT_NE(0u, pImplicitArgs->v1.syncBufferPtr);
EXPECT_EQ(alloc.getGpuAddress(), pImplicitArgs->v1.syncBufferPtr);
}
TEST_F(KernelImplicitArgTests, givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated) { TEST_F(KernelImplicitArgTests, givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u); std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);

View File

@@ -1774,6 +1774,9 @@ void Kernel::patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOff
Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0, Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0,
areMultipleSubDevicesInContext()); areMultipleSubDevicesInContext());
} }
if (pImplicitArgs) {
pImplicitArgs->setSyncBufferPtr(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset));
}
} }
bool Kernel::isPatched() const { bool Kernel::isPatched() const {

View File

@@ -3847,6 +3847,40 @@ TEST_F(KernelImplicitArgsTest, GivenProgramWithImplicitAccessBufferVersionWhenKe
} }
} }
TEST_F(KernelImplicitArgsTest, GivenKernelWithSyncBufferWhenInitializingKernelThenImplicitArgsSyncBufferPtrIsSet) {
auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
MockContext context(pClDevice);
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
program.indirectAccessBufferMajorVersion = 1;
pKernelInfo->kernelDescriptor.kernelMetadata.kernelName = "test";
pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = true;
pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless = 0;
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize = sizeof(uintptr_t);
MockKernel kernel(&program, *pKernelInfo, *pClDevice);
kernel.initialize();
kernel.setCrossThreadData(nullptr, 64);
NEO::MockGraphicsAllocation alloc;
alloc.setGpuPtr(0xffff800300060000);
alloc.allocationOffset = 0x0;
size_t bufferOffset = 0u;
kernel.patchSyncBuffer(&alloc, bufferOffset);
auto implicitArgs = kernel.getImplicitArgs();
ASSERT_NE(nullptr, implicitArgs);
auto syncBufferAddress = alloc.getGpuAddress();
EXPECT_EQ(syncBufferAddress, implicitArgs->v1.syncBufferPtr);
}
TEST_F(KernelImplicitArgsTest, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreProperlySet) { TEST_F(KernelImplicitArgsTest, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreProperlySet) {
auto pKernelInfo = std::make_unique<MockKernelInfo>(); auto pKernelInfo = std::make_unique<MockKernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;