Fixed AppendMemoryFill for alignment, multi kernel execution, memory overrun

- Fixed Memory Fill to use aligned allocations in Destination and Source
buffer kernel arguments to ensure proper alignment of memory in the
surface state.
- Fixed patternSize == 1 case where simd size is > the size being
written resulting in the inital kernel append with group count of 0 and
the remainder kernel being executed incorrectly.
- Fixed offset calculation in the remainder kernel to be based off of
the aligned allocation offset.
- Fixed memory overrun case where size < patternsize with the minimum
size being used in the kernel.

Change-Id: I39bd83b3a83ceb6df05d374238f85f7fdf0bd09a
Signed-off-by: Spruit, Neil R <neil.r.spruit@intel.com>
This commit is contained in:
Spruit, Neil R
2020-10-29 17:57:14 +00:00
parent a07be76146
commit 3e5f3fe055
2 changed files with 83 additions and 12 deletions

View File

@@ -1110,8 +1110,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
uintptr_t dstPtr = reinterpret_cast<uintptr_t>(ptr);
size_t dstOffset = 0;
NEO::EncodeSurfaceState<GfxFamily>::getSshAlignedPointer(dstPtr, dstOffset);
auto dstAllocation = this->getAlignedAllocation(this->device, reinterpret_cast<void *>(dstPtr), size);
uintptr_t srcPtr = reinterpret_cast<uintptr_t>(const_cast<void *>(pattern));
size_t srcOffset = 0;
@@ -1126,14 +1126,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
if (groupSizeX > static_cast<uint32_t>(size)) {
groupSizeX = static_cast<uint32_t>(size);
}
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
uint32_t value = *(reinterpret_cast<uint32_t *>(const_cast<void *>(pattern)));
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value);
} else {
@@ -1146,14 +1149,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
srcOffset += patternAlloc.offset;
groupSizeX = static_cast<uint32_t>(patternSize);
groupSizeX = static_cast<uint32_t>(std::min(patternSize, size));
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgBufferWithAlloc(2, patternAlloc.alignedAllocationPtr,
patternAlloc.alloc);
builtinFunction->setArgumentValue(3, sizeof(srcOffset), &srcOffset);
@@ -1178,11 +1181,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
ze_group_count_t dispatchFuncArgs{1u, 1u, 1u};
dstPtr = dstPtr + (size - groupRemainderSizeX);
dstOffset = 0;
NEO::EncodeSurfaceState<GfxFamily>::getSshAlignedPointer(dstPtr, dstOffset);
builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr);
size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
res = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),

View File

@@ -69,6 +69,7 @@ class MockCommandListForMemFillHostPtr : public WhiteBox<::L0::CommandListCoreFa
}
};
uint32_t memoryFillMockGroupSizeX = 0, memoryFillMockGroupSizeY = 0, memoryFillMockGroupSizeZ = 0;
struct AppendMemoryFillFixture {
class MockDriverHandleHostPtr : public L0::DriverHandleImp {
public:
@@ -91,10 +92,28 @@ struct AppendMemoryFillFixture {
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
NEO::SvmAllocationData data{rootDeviceIndex};
};
struct MockKernelImmutableDataForMemFill : KernelImmutableData {
MockKernelImmutableDataForMemFill(L0::Device *l0device = nullptr) {
mockKernelDescriptor = new NEO::KernelDescriptor;
mockKernelDescriptor->kernelAttributes.simdSize = 32;
kernelDescriptor = mockKernelDescriptor;
return;
}
~MockKernelImmutableDataForMemFill() override {
delete mockKernelDescriptor;
}
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
};
class MockKernelForMemFill : public L0::KernelImp {
public:
MockKernelForMemFill() {
mockKernelImmData = new MockKernelImmutableDataForMemFill();
}
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) override {
memoryFillMockGroupSizeX = groupSizeX;
memoryFillMockGroupSizeY = groupSizeY;
memoryFillMockGroupSizeZ = groupSizeZ;
return ZE_RESULT_ERROR_UNKNOWN;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
@@ -103,7 +122,14 @@ struct AppendMemoryFillFixture {
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
const MockKernelImmutableDataForMemFill *getImmutableData() const override {
return mockKernelImmData;
}
~MockKernelForMemFill() override {
delete mockKernelImmData;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
MockKernelImmutableDataForMemFill *mockKernelImmData = nullptr;
};
struct MockBuiltinFunctionsForMemFill : BuiltinFunctionsLibImpl {
@@ -132,6 +158,9 @@ struct AppendMemoryFillFixture {
MockBuiltinFunctionsForMemFill *tmpMockBultinLib = nullptr;
};
virtual void SetUp() { // NOLINT(readability-identifier-naming)
memoryFillMockGroupSizeX = 0;
memoryFillMockGroupSizeY = 0;
memoryFillMockGroupSizeZ = 0;
neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get());
NEO::DeviceVector devices;
@@ -427,5 +456,47 @@ HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWhenMemoryfillC
deviceMock.get()->setDriverHandle(driverHandle.get());
}
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithPatternSizeGreaterThanSizeWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) {
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
MockDriverHandleHostPtr driverHandleMock;
size_t patternSize = 0x1001;
size_t dstSize = 0x1000;
deviceMock.get()->setDriverHandle(&driverHandleMock);
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
uint64_t pattern[4] = {1, 2, 3, 4};
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
EXPECT_EQ(memoryFillMockGroupSizeX, 0x1000u);
deviceMock.get()->setDriverHandle(driverHandle.get());
}
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLessThanSimdWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) {
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
MockDriverHandleHostPtr driverHandleMock;
size_t patternSize = 1;
size_t dstSize = 16;
deviceMock.get()->setDriverHandle(&driverHandleMock);
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
uint64_t pattern[4] = {1, 2, 3, 4};
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
EXPECT_EQ(memoryFillMockGroupSizeX, 16u);
deviceMock.get()->setDriverHandle(driverHandle.get());
}
HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLargerThanSimdWhenMemoryfillCalledThenGroupSizeIsSimd, Platforms) {
MockCommandListForMemFillHostPtr<gfxCoreFamily> cmdList;
MockDriverHandleHostPtr driverHandleMock;
size_t patternSize = 1;
size_t dstSize = 64;
deviceMock.get()->setDriverHandle(&driverHandleMock);
cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute);
uint64_t pattern[4] = {1, 2, 3, 4};
void *ptr = reinterpret_cast<void *>(registeredGraphicsAllocationAddress);
cmdList.appendMemoryFill(ptr, reinterpret_cast<void *>(&pattern), patternSize, dstSize, nullptr);
EXPECT_EQ(memoryFillMockGroupSizeX, 32u);
deviceMock.get()->setDriverHandle(driverHandle.get());
}
} // namespace ult
} // namespace L0