mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-01 04:23:00 +08:00
fix: Correct alignment check for immediate fill v2
Resolves: HSD-18042731538 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f2bd2d3716
commit
67462c4356
@@ -2308,6 +2308,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
inline bool canUseImmediateFill(size_t size, size_t patternSize, size_t offset, size_t maxWgSize) {
|
||||
return patternSize == 1 || (patternSize <= 4 &&
|
||||
isAligned<sizeof(uint32_t)>(offset) &&
|
||||
isAligned<sizeof(uint32_t) * 4>(size) &&
|
||||
(size <= maxWgSize || isAligned(size / (sizeof(uint32_t) * 4), maxWgSize)));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
const void *pattern,
|
||||
@@ -2392,8 +2399,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
const auto maxWgSize = this->device->getDeviceInfo().maxWorkGroupSize;
|
||||
bool useImmediateFill = patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(dstAllocation.offset) && isAligned<sizeof(uint32_t) * 4>(size) && (size <= maxWgSize || isAligned(size, maxWgSize)));
|
||||
bool useImmediateFill = canUseImmediateFill(size, patternSize, dstAllocation.offset, this->device->getDeviceInfo().maxWorkGroupSize);
|
||||
auto builtin = useImmediateFill
|
||||
? BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferImmediate>(isStateless, isHeapless)
|
||||
: BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless);
|
||||
@@ -4113,9 +4119,10 @@ void CommandListCoreFamily<gfxCoreFamily>::setupFillKernelArguments(size_t baseO
|
||||
size_t dstSize,
|
||||
CmdListFillKernelArguments &outArguments,
|
||||
Kernel *kernel) {
|
||||
constexpr auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
const auto maxWgSize = this->device->getDeviceInfo().maxWorkGroupSize;
|
||||
if (patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(baseOffset) && isAligned<dataTypeSize>(dstSize) && (dstSize <= maxWgSize || isAligned(dstSize, maxWgSize)))) {
|
||||
if (canUseImmediateFill(dstSize, patternSize, baseOffset, maxWgSize)) {
|
||||
constexpr auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
|
||||
size_t middleSize = dstSize;
|
||||
outArguments.mainOffset = baseOffset;
|
||||
outArguments.leftRemainingBytes = sizeof(uint32_t) - (baseOffset % sizeof(uint32_t));
|
||||
|
||||
@@ -48,6 +48,21 @@ HWTEST_F(AppendFillTest, givenCallToAppendMemoryFillWithAppendLaunchKernelFailur
|
||||
EXPECT_NE(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
HWTEST_F(AppendFillTest, givenCallToAppendMemoryFillWithDataSizeNotAlignedToBothSizeOfFillDataAndMaxWgsThenUseFill) {
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
|
||||
const auto patternSize = 4;
|
||||
const auto allocSize = sizeof(uint32_t) * 4 * device->getDeviceInfo().maxWorkGroupSize + 1;
|
||||
size_t patternTagsVectorSizeBefore = commandList->patternTags.size();
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t patternTagsVectorSize = commandList->patternTags.size();
|
||||
EXPECT_NE(patternTagsVectorSize, patternTagsVectorSizeBefore);
|
||||
EXPECT_EQ(0u, commandList->patternAllocations.size());
|
||||
}
|
||||
|
||||
HWTEST_F(AppendFillTest, givenCallToAppendMemoryFillWithPatternSizeLessOrEqualThanFourButUnalignedSizeThenUseFill) {
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
|
||||
Reference in New Issue
Block a user