mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 18:06:32 +08:00
Fix truncation issues from size_t to uint32_t
Related-To: LOCI-2558 Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
72d16c7cf2
commit
702faca7a9
@@ -1449,21 +1449,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size, false);
|
auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size, false);
|
||||||
|
//get_global_id(0) in FillBufferMiddle only goes upto UINT32_MAX,
|
||||||
|
constexpr size_t getGlobalIdMaxLimit = 4ull * MemoryConstants::gigaByte;
|
||||||
|
//FillBufferMiddle copies data based on uint, so the minimum size is sizeof(uint32_t),
|
||||||
|
//and Max limited to sizeof(uint32_t) * getGlobalIdMaxLimit
|
||||||
|
constexpr size_t fillBufferMiddleMinLimit = sizeof(uint32_t);
|
||||||
|
constexpr size_t fillBufferMiddleMaxLimit = sizeof(uint32_t) * getGlobalIdMaxLimit;
|
||||||
if (size >= 4ull * MemoryConstants::gigaByte) {
|
if (size >= 4ull * MemoryConstants::gigaByte) {
|
||||||
isStateless = true;
|
isStateless = true;
|
||||||
|
if (size > fillBufferMiddleMaxLimit) {
|
||||||
|
return ZE_RESULT_ERROR_UNSUPPORTED_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||||
|
|
||||||
if (patternSize == 1) {
|
if ((patternSize == 1) && (size < fillBufferMiddleMinLimit)) {
|
||||||
Kernel *builtinFunction = nullptr;
|
Kernel *builtinFunction = nullptr;
|
||||||
|
|
||||||
if (isStateless) {
|
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferImmediate);
|
|
||||||
} else {
|
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
|
||||||
}
|
|
||||||
uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
||||||
if (groupSizeX > static_cast<uint32_t>(size)) {
|
if (groupSizeX > size) {
|
||||||
groupSizeX = static_cast<uint32_t>(size);
|
groupSizeX = static_cast<uint32_t>(size);
|
||||||
}
|
}
|
||||||
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
||||||
@@ -1478,14 +1484,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
|
|
||||||
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
||||||
|
|
||||||
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
|
uint64_t groups = size / groupSizeX;
|
||||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
DEBUG_BREAK_IF(groups >= UINT32_MAX);
|
||||||
|
|
||||||
|
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||||
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size) % groupSizeX;
|
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size % groupSizeX);
|
||||||
if (groupRemainderSizeX) {
|
if (groupRemainderSizeX) {
|
||||||
builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u);
|
builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u);
|
||||||
ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u};
|
ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u};
|
||||||
@@ -1510,13 +1518,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
|
|
||||||
size_t middleElSize = sizeof(uint32_t);
|
size_t middleElSize = sizeof(uint32_t);
|
||||||
size_t adjustedSize = size / middleElSize;
|
size_t adjustedSize = size / middleElSize;
|
||||||
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
uint32_t groupSizeX = (adjustedSize >= UINT32_MAX) ? UINT32_MAX - 1 : static_cast<uint32_t>(adjustedSize);
|
||||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||||
|
|
||||||
uint32_t groups = static_cast<uint32_t>(adjustedSize) / groupSizeX;
|
uint64_t groups = adjustedSize / groupSizeX;
|
||||||
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size) % groupSizeX;
|
DEBUG_BREAK_IF(groups >= UINT32_MAX);
|
||||||
|
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size % groupSizeX);
|
||||||
|
|
||||||
size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize);
|
size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize);
|
||||||
uint32_t patternSizeInEls = static_cast<uint32_t>(patternAllocationSize / middleElSize);
|
uint32_t patternSizeInEls = static_cast<uint32_t>(patternAllocationSize / middleElSize);
|
||||||
@@ -1527,6 +1536,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
patternAllocationSize,
|
patternAllocationSize,
|
||||||
NEO::GraphicsAllocation::AllocationType::FILL_PATTERN,
|
NEO::GraphicsAllocation::AllocationType::FILL_PATTERN,
|
||||||
device->getNEODevice()->getDeviceBitfield()});
|
device->getNEODevice()->getDeviceBitfield()});
|
||||||
|
if (patternGfxAlloc == nullptr) {
|
||||||
|
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "out of host memory error, check vm.max_map_count in sysctl for mmap limits, and total memory allocated for the application\n", "");
|
||||||
|
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer();
|
void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer();
|
||||||
patternAllocations.push_back(patternGfxAlloc);
|
patternAllocations.push_back(patternGfxAlloc);
|
||||||
@@ -1551,14 +1564,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
|
|
||||||
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
appendEventForProfilingAllWalkers(hSignalEvent, true);
|
||||||
|
|
||||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||||
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (groupRemainderSizeX) {
|
if (groupRemainderSizeX) {
|
||||||
uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast<uint32_t>(middleElSize);
|
uint32_t dstOffsetRemainder = static_cast<uint32_t>(groups) * groupSizeX * static_cast<uint32_t>(middleElSize);
|
||||||
uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize;
|
uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize;
|
||||||
|
|
||||||
Kernel *builtinFunctionRemainder;
|
Kernel *builtinFunctionRemainder;
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ class AppendFillFixture : public DeviceFixture {
|
|||||||
}
|
}
|
||||||
|
|
||||||
numberOfCallsToAppendLaunchKernelWithParams++;
|
numberOfCallsToAppendLaunchKernelWithParams++;
|
||||||
|
groupSizeX.push_back(Kernel::fromHandle(hKernel)->getGroupSize()[0]);
|
||||||
|
simdSize.push_back(Kernel::fromHandle(hKernel)->getImmutableData()->getDescriptor().kernelAttributes.simdSize);
|
||||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(hKernel,
|
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(hKernel,
|
||||||
pThreadGroupDimensions,
|
pThreadGroupDimensions,
|
||||||
hEvent,
|
hEvent,
|
||||||
@@ -62,7 +64,8 @@ class AppendFillFixture : public DeviceFixture {
|
|||||||
isPredicate,
|
isPredicate,
|
||||||
isCooperative);
|
isCooperative);
|
||||||
}
|
}
|
||||||
|
std::vector<uint32_t> groupSizeX;
|
||||||
|
std::vector<uint32_t> simdSize;
|
||||||
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
||||||
};
|
};
|
||||||
@@ -210,5 +213,82 @@ HWTEST2_F(AppendFillTest,
|
|||||||
delete[] nonMultipleDstPtr;
|
delete[] nonMultipleDstPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWithLessThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetToSize, IsAtLeastSkl) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
size_t smallSize = 2ull / sizeof(uint8_t);
|
||||||
|
uint8_t *smallPtr = new uint8_t[smallSize];
|
||||||
|
auto result = commandList->appendMemoryFill(smallPtr, pattern, 1, smallSize, nullptr, 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(commandList->groupSizeX[0], smallSize);
|
||||||
|
|
||||||
|
delete[] smallPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWithMoreThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
size_t largeSize = (4ull * MemoryConstants::megaByte) / sizeof(uint8_t);
|
||||||
|
uint8_t *largePtr = new uint8_t[largeSize];
|
||||||
|
if (largePtr == nullptr) {
|
||||||
|
std::cout << "skipping tests due to lack of memory, size: " << largeSize << std::endl;
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
size_t adjustedSize = largeSize / sizeof(uint32_t);
|
||||||
|
EXPECT_LT(adjustedSize, UINT32_MAX);
|
||||||
|
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
||||||
|
uint32_t groupSize[] = {groupSizeX, 1u, 1u};
|
||||||
|
|
||||||
|
auto builtInFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
||||||
|
builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2],
|
||||||
|
&groupSize[0], &groupSize[1], &groupSize[2]);
|
||||||
|
auto result = commandList->appendMemoryFill(largePtr, pattern, 4, largeSize, nullptr, 0, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]);
|
||||||
|
|
||||||
|
delete[] largePtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWith4GBSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
size_t largeSize = (4ull * MemoryConstants::gigaByte);
|
||||||
|
uint8_t fakeBuffer = 0;
|
||||||
|
size_t adjustedSize = largeSize / sizeof(uint32_t);
|
||||||
|
EXPECT_LT(adjustedSize, UINT32_MAX);
|
||||||
|
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
||||||
|
uint32_t groupSize[] = {groupSizeX, 1u, 1u};
|
||||||
|
|
||||||
|
auto builtInFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferMiddle);
|
||||||
|
builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2],
|
||||||
|
&groupSize[0], &groupSize[1], &groupSize[2]);
|
||||||
|
auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(AppendFillTest,
|
||||||
|
givenCallToAppendMemoryFillWithBiggerThan16GBThenUnsupportedSizeIsReturned, IsAtLeastSkl) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
size_t largeSize = (16ull * MemoryConstants::gigaByte) + 1;
|
||||||
|
uint8_t fakeBuffer = 0;
|
||||||
|
auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr);
|
||||||
|
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result);
|
||||||
|
EXPECT_EQ(0, fakeBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
Reference in New Issue
Block a user