From 702faca7a9ebdcb8d0bfdde4e25b7efd16fc8392 Mon Sep 17 00:00:00 2001 From: Young Jin Yoon Date: Thu, 14 Oct 2021 01:10:12 +0000 Subject: [PATCH] Fix truncation issues from size_t to uint32_t Related-To: LOCI-2558 Signed-off-by: Young Jin Yoon --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 43 ++++++---- .../sources/cmdlist/test_cmdlist_fill.cpp | 82 ++++++++++++++++++- 2 files changed, 109 insertions(+), 16 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 0ef8591ec4..b855279912 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1449,21 +1449,27 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size, false); + //get_global_id(0) in FillBufferMiddle only goes upto UINT32_MAX, + constexpr size_t getGlobalIdMaxLimit = 4ull * MemoryConstants::gigaByte; + //FillBufferMiddle copies data based on uint, so the minimum size is sizeof(uint32_t), + //and Max limited to sizeof(uint32_t) * getGlobalIdMaxLimit + constexpr size_t fillBufferMiddleMinLimit = sizeof(uint32_t); + constexpr size_t fillBufferMiddleMaxLimit = sizeof(uint32_t) * getGlobalIdMaxLimit; if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; + if (size > fillBufferMiddleMaxLimit) { + return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; + } } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); - if (patternSize == 1) { + if ((patternSize == 1) && (size < fillBufferMiddleMinLimit)) { Kernel *builtinFunction = nullptr; - if (isStateless) { - builtinFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferImmediate); - } else { - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); - } + builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); + uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; - if (groupSizeX > static_cast(size)) { + if (groupSizeX > size) { groupSizeX = static_cast(size); } if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { @@ -1478,14 +1484,16 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(hSignalEvent, true); - uint32_t groups = static_cast(size) / groupSizeX; - ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; + uint64_t groups = size / groupSizeX; + DEBUG_BREAK_IF(groups >= UINT32_MAX); + + ze_group_count_t dispatchFuncArgs{static_cast(groups), 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } - uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; + uint32_t groupRemainderSizeX = static_cast(size % groupSizeX); if (groupRemainderSizeX) { builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u); ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u}; @@ -1510,13 +1518,14 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, size_t middleElSize = sizeof(uint32_t); size_t adjustedSize = size / middleElSize; - uint32_t groupSizeX = static_cast(adjustedSize); + uint32_t groupSizeX = (adjustedSize >= UINT32_MAX) ? UINT32_MAX - 1 : static_cast(adjustedSize); uint32_t groupSizeY = 1, groupSizeZ = 1; builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); - uint32_t groups = static_cast(adjustedSize) / groupSizeX; - uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; + uint64_t groups = adjustedSize / groupSizeX; + DEBUG_BREAK_IF(groups >= UINT32_MAX); + uint32_t groupRemainderSizeX = static_cast(size % groupSizeX); size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize); uint32_t patternSizeInEls = static_cast(patternAllocationSize / middleElSize); @@ -1527,6 +1536,10 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, patternAllocationSize, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN, device->getNEODevice()->getDeviceBitfield()}); + if (patternGfxAlloc == nullptr) { + PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "out of host memory error, check vm.max_map_count in sysctl for mmap limits, and total memory allocated for the application\n", ""); + return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } } void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer(); patternAllocations.push_back(patternGfxAlloc); @@ -1551,14 +1564,14 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(hSignalEvent, true); - ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; + ze_group_count_t dispatchFuncArgs{static_cast(groups), 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } if (groupRemainderSizeX) { - uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast(middleElSize); + uint32_t dstOffsetRemainder = static_cast(groups) * groupSizeX * static_cast(middleElSize); uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize; Kernel *builtinFunctionRemainder; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index a8a50837ab..b23939054b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -55,6 +55,8 @@ class AppendFillFixture : public DeviceFixture { } numberOfCallsToAppendLaunchKernelWithParams++; + groupSizeX.push_back(Kernel::fromHandle(hKernel)->getGroupSize()[0]); + simdSize.push_back(Kernel::fromHandle(hKernel)->getImmutableData()->getDescriptor().kernelAttributes.simdSize); return CommandListCoreFamily::appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, @@ -62,7 +64,8 @@ class AppendFillFixture : public DeviceFixture { isPredicate, isCooperative); } - + std::vector groupSizeX; + std::vector simdSize; uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits::max(); uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0; }; @@ -210,5 +213,82 @@ HWTEST2_F(AppendFillTest, delete[] nonMultipleDstPtr; } +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWithLessThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetToSize, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + size_t smallSize = 2ull / sizeof(uint8_t); + uint8_t *smallPtr = new uint8_t[smallSize]; + auto result = commandList->appendMemoryFill(smallPtr, pattern, 1, smallSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->groupSizeX[0], smallSize); + + delete[] smallPtr; +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWithMoreThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + size_t largeSize = (4ull * MemoryConstants::megaByte) / sizeof(uint8_t); + uint8_t *largePtr = new uint8_t[largeSize]; + if (largePtr == nullptr) { + std::cout << "skipping tests due to lack of memory, size: " << largeSize << std::endl; + GTEST_SKIP(); + } + size_t adjustedSize = largeSize / sizeof(uint32_t); + EXPECT_LT(adjustedSize, UINT32_MAX); + uint32_t groupSizeX = static_cast(adjustedSize); + uint32_t groupSize[] = {groupSizeX, 1u, 1u}; + + auto builtInFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle); + builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2], + &groupSize[0], &groupSize[1], &groupSize[2]); + auto result = commandList->appendMemoryFill(largePtr, pattern, 4, largeSize, nullptr, 0, nullptr); + + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]); + + delete[] largePtr; +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWith4GBSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + size_t largeSize = (4ull * MemoryConstants::gigaByte); + uint8_t fakeBuffer = 0; + size_t adjustedSize = largeSize / sizeof(uint32_t); + EXPECT_LT(adjustedSize, UINT32_MAX); + uint32_t groupSizeX = static_cast(adjustedSize); + uint32_t groupSize[] = {groupSizeX, 1u, 1u}; + + auto builtInFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferMiddle); + builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2], + &groupSize[0], &groupSize[1], &groupSize[2]); + auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr); + + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]); +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWithBiggerThan16GBThenUnsupportedSizeIsReturned, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + size_t largeSize = (16ull * MemoryConstants::gigaByte) + 1; + uint8_t fakeBuffer = 0; + auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); + EXPECT_EQ(0, fakeBuffer); +} + } // namespace ult } // namespace L0