mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Fixed AppendMemoryFill for alignment, multi kernel execution, memory overrun
- Fixed Memory Fill to use aligned allocations in Destination and Source buffer kernel arguments to ensure proper alignment of memory in the surface state. - Fixed patternSize == 1 case where simd size is > the size being written resulting in the inital kernel append with group count of 0 and the remainder kernel being executed incorrectly. - Fixed offset calculation in the remainder kernel to be based off of the aligned allocation offset. - Fixed memory overrun case where size < patternsize with the minimum size being used in the kernel. Change-Id: I39bd83b3a83ceb6df05d374238f85f7fdf0bd09a Signed-off-by: Spruit, Neil R <neil.r.spruit@intel.com>
This commit is contained in:
@@ -1110,8 +1110,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
|
||||
uintptr_t dstPtr = reinterpret_cast<uintptr_t>(ptr);
|
||||
size_t dstOffset = 0;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::getSshAlignedPointer(dstPtr, dstOffset);
|
||||
|
||||
auto dstAllocation = this->getAlignedAllocation(this->device, reinterpret_cast<void *>(dstPtr), size);
|
||||
|
||||
uintptr_t srcPtr = reinterpret_cast<uintptr_t>(const_cast<void *>(pattern));
|
||||
size_t srcOffset = 0;
|
||||
@@ -1126,14 +1126,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||
|
||||
groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
||||
if (groupSizeX > static_cast<uint32_t>(size)) {
|
||||
groupSizeX = static_cast<uint32_t>(size);
|
||||
}
|
||||
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
uint32_t value = *(reinterpret_cast<uint32_t *>(const_cast<void *>(pattern)));
|
||||
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
} else {
|
||||
@@ -1146,14 +1149,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
srcOffset += patternAlloc.offset;
|
||||
|
||||
groupSizeX = static_cast<uint32_t>(patternSize);
|
||||
groupSizeX = static_cast<uint32_t>(std::min(patternSize, size));
|
||||
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgBufferWithAlloc(2, patternAlloc.alignedAllocationPtr,
|
||||
patternAlloc.alloc);
|
||||
builtinFunction->setArgumentValue(3, sizeof(srcOffset), &srcOffset);
|
||||
@@ -1178,11 +1181,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
ze_group_count_t dispatchFuncArgs{1u, 1u, 1u};
|
||||
|
||||
dstPtr = dstPtr + (size - groupRemainderSizeX);
|
||||
dstOffset = 0;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::getSshAlignedPointer(dstPtr, dstOffset);
|
||||
|
||||
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
|
||||
size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||
|
||||
res = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),
|
||||
|
||||
@@ -69,6 +69,7 @@ class MockCommandListForMemFillHostPtr : public WhiteBox<::L0::CommandListCoreFa
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t memoryFillMockGroupSizeX = 0, memoryFillMockGroupSizeY = 0, memoryFillMockGroupSizeZ = 0;
|
||||
struct AppendMemoryFillFixture {
|
||||
class MockDriverHandleHostPtr : public L0::DriverHandleImp {
|
||||
public:
|
||||
@@ -91,10 +92,28 @@ struct AppendMemoryFillFixture {
|
||||
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
|
||||
NEO::SvmAllocationData data{rootDeviceIndex};
|
||||
};
|
||||
struct MockKernelImmutableDataForMemFill : KernelImmutableData {
|
||||
MockKernelImmutableDataForMemFill(L0::Device *l0device = nullptr) {
|
||||
mockKernelDescriptor = new NEO::KernelDescriptor;
|
||||
mockKernelDescriptor->kernelAttributes.simdSize = 32;
|
||||
kernelDescriptor = mockKernelDescriptor;
|
||||
return;
|
||||
}
|
||||
~MockKernelImmutableDataForMemFill() override {
|
||||
delete mockKernelDescriptor;
|
||||
}
|
||||
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
|
||||
};
|
||||
class MockKernelForMemFill : public L0::KernelImp {
|
||||
public:
|
||||
MockKernelForMemFill() {
|
||||
mockKernelImmData = new MockKernelImmutableDataForMemFill();
|
||||
}
|
||||
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) override {
|
||||
memoryFillMockGroupSizeX = groupSizeX;
|
||||
memoryFillMockGroupSizeY = groupSizeY;
|
||||
memoryFillMockGroupSizeZ = groupSizeZ;
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
||||
@@ -103,7 +122,14 @@ struct AppendMemoryFillFixture {
|
||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
|
||||
return;
|
||||
}
|
||||
const MockKernelImmutableDataForMemFill *getImmutableData() const override {
|
||||
return mockKernelImmData;
|
||||
}
|
||||
~MockKernelForMemFill() override {
|
||||
delete mockKernelImmData;
|
||||
}
|
||||
std::unique_ptr<Kernel> clone() const override { return nullptr; }
|
||||
MockKernelImmutableDataForMemFill *mockKernelImmData = nullptr;
|
||||
};
|
||||
|
||||
struct MockBuiltinFunctionsForMemFill : BuiltinFunctionsLibImpl {
|
||||
@@ -132,6 +158,9 @@ struct AppendMemoryFillFixture {
|
||||
MockBuiltinFunctionsForMemFill *tmpMockBultinLib = nullptr;
|
||||
};
|
||||
virtual void SetUp() { // NOLINT(readability-identifier-naming)
|
||||
memoryFillMockGroupSizeX = 0;
|
||||
memoryFillMockGroupSizeY = 0;
|
||||
memoryFillMockGroupSizeZ = 0;
|
||||
neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
|
||||
neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
|
||||
NEO::DeviceVector devices;
|
||||
@@ -427,5 +456,47 @@ HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWhenMemoryfillC
|
||||
deviceMock.get()->setDriverHandle(driverHandle.get());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithPatternSizeGreaterThanSizeWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) {
|
||||
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
|
||||
MockDriverHandleHostPtr driverHandleMock;
|
||||
size_t patternSize = 0x1001;
|
||||
size_t dstSize = 0x1000;
|
||||
deviceMock.get()->setDriverHandle(&driverHandleMock);
|
||||
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
|
||||
uint64_t pattern[4] = {1, 2, 3, 4};
|
||||
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
|
||||
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
|
||||
EXPECT_EQ(memoryFillMockGroupSizeX, 0x1000u);
|
||||
deviceMock.get()->setDriverHandle(driverHandle.get());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLessThanSimdWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) {
|
||||
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
|
||||
MockDriverHandleHostPtr driverHandleMock;
|
||||
size_t patternSize = 1;
|
||||
size_t dstSize = 16;
|
||||
deviceMock.get()->setDriverHandle(&driverHandleMock);
|
||||
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
|
||||
uint64_t pattern[4] = {1, 2, 3, 4};
|
||||
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
|
||||
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
|
||||
EXPECT_EQ(memoryFillMockGroupSizeX, 16u);
|
||||
deviceMock.get()->setDriverHandle(driverHandle.get());
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLargerThanSimdWhenMemoryfillCalledThenGroupSizeIsSimd, Platforms) {
|
||||
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
|
||||
MockDriverHandleHostPtr driverHandleMock;
|
||||
size_t patternSize = 1;
|
||||
size_t dstSize = 64;
|
||||
deviceMock.get()->setDriverHandle(&driverHandleMock);
|
||||
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
|
||||
uint64_t pattern[4] = {1, 2, 3, 4};
|
||||
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
|
||||
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
|
||||
EXPECT_EQ(memoryFillMockGroupSizeX, 32u);
|
||||
deviceMock.get()->setDriverHandle(driverHandle.get());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user