From c9f6460d9f81e8ea132da95701b5f3d42fd80a52 Mon Sep 17 00:00:00 2001 From: Young Jin Yoon Date: Mon, 27 Dec 2021 17:22:46 +0000 Subject: [PATCH] Revert "Fix truncation issues from size_t to uint32_t" This reverts commit 314d549b003a26066a3290db8d87eef59fb347f9. This revert is to avoid errors and performance regressions on specific platforms. Related-To: LOCI-2558 Signed-off-by: Young Jin Yoon --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 43 ++++------ .../sources/cmdlist/test_cmdlist_fill.cpp | 82 +------------------ 2 files changed, 16 insertions(+), 109 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index a87fe96902..363d08017f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1453,27 +1453,21 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size, false); - //get_global_id(0) in FillBufferMiddle only goes upto UINT32_MAX, - constexpr size_t getGlobalIdMaxLimit = 4ull * MemoryConstants::gigaByte; - //FillBufferMiddle copies data based on uint, so the minimum size is sizeof(uint32_t), - //and Max limited to sizeof(uint32_t) * getGlobalIdMaxLimit - constexpr size_t fillBufferMiddleMinLimit = sizeof(uint32_t); - constexpr size_t fillBufferMiddleMaxLimit = sizeof(uint32_t) * getGlobalIdMaxLimit; if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; - if (size > fillBufferMiddleMaxLimit) { - return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; - } } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); - if ((patternSize == 1) && (size < fillBufferMiddleMinLimit)) { + if (patternSize == 1) { Kernel *builtinFunction = nullptr; - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); - + if (isStateless) { + builtinFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferImmediate); + } else { + builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); + } uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; - if (groupSizeX > size) { + if (groupSizeX > static_cast(size)) { groupSizeX = static_cast(size); } if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { @@ -1488,16 +1482,14 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(hSignalEvent, true); - uint64_t groups = size / groupSizeX; - DEBUG_BREAK_IF(groups >= UINT32_MAX); - - ze_group_count_t dispatchFuncArgs{static_cast(groups), 1u, 1u}; + uint32_t groups = static_cast(size) / groupSizeX; + ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } - uint32_t groupRemainderSizeX = static_cast(size % groupSizeX); + uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; if (groupRemainderSizeX) { builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u); ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u}; @@ -1522,14 +1514,13 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, size_t middleElSize = sizeof(uint32_t); size_t adjustedSize = size / middleElSize; - uint32_t groupSizeX = (adjustedSize >= UINT32_MAX) ? UINT32_MAX - 1 : static_cast(adjustedSize); + uint32_t groupSizeX = static_cast(adjustedSize); uint32_t groupSizeY = 1, groupSizeZ = 1; builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); - uint64_t groups = adjustedSize / groupSizeX; - DEBUG_BREAK_IF(groups >= UINT32_MAX); - uint32_t groupRemainderSizeX = static_cast(size % groupSizeX); + uint32_t groups = static_cast(adjustedSize) / groupSizeX; + uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize); uint32_t patternSizeInEls = static_cast(patternAllocationSize / middleElSize); @@ -1540,10 +1531,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, patternAllocationSize, NEO::GraphicsAllocation::AllocationType::FILL_PATTERN, device->getNEODevice()->getDeviceBitfield()}); - if (patternGfxAlloc == nullptr) { - PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "out of host memory error, check vm.max_map_count in sysctl for mmap limits, and total memory allocated for the application\n", ""); - return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } } void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer(); patternAllocations.push_back(patternGfxAlloc); @@ -1568,14 +1555,14 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(hSignalEvent, true); - ze_group_count_t dispatchFuncArgs{static_cast(groups), 1u, 1u}; + ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } if (groupRemainderSizeX) { - uint32_t dstOffsetRemainder = static_cast(groups) * groupSizeX * static_cast(middleElSize); + uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast(middleElSize); uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize; Kernel *builtinFunctionRemainder; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index b23939054b..a8a50837ab 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -55,8 +55,6 @@ class AppendFillFixture : public DeviceFixture { } numberOfCallsToAppendLaunchKernelWithParams++; - groupSizeX.push_back(Kernel::fromHandle(hKernel)->getGroupSize()[0]); - simdSize.push_back(Kernel::fromHandle(hKernel)->getImmutableData()->getDescriptor().kernelAttributes.simdSize); return CommandListCoreFamily::appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, @@ -64,8 +62,7 @@ class AppendFillFixture : public DeviceFixture { isPredicate, isCooperative); } - std::vector groupSizeX; - std::vector simdSize; + uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits::max(); uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0; }; @@ -213,82 +210,5 @@ HWTEST2_F(AppendFillTest, delete[] nonMultipleDstPtr; } -HWTEST2_F(AppendFillTest, - givenCallToAppendMemoryFillWithLessThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetToSize, IsAtLeastSkl) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - auto commandList = std::make_unique>>(); - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - - size_t smallSize = 2ull / sizeof(uint8_t); - uint8_t *smallPtr = new uint8_t[smallSize]; - auto result = commandList->appendMemoryFill(smallPtr, pattern, 1, smallSize, nullptr, 0, nullptr); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(commandList->groupSizeX[0], smallSize); - - delete[] smallPtr; -} - -HWTEST2_F(AppendFillTest, - givenCallToAppendMemoryFillWithMoreThan4BSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - auto commandList = std::make_unique>>(); - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - - size_t largeSize = (4ull * MemoryConstants::megaByte) / sizeof(uint8_t); - uint8_t *largePtr = new uint8_t[largeSize]; - if (largePtr == nullptr) { - std::cout << "skipping tests due to lack of memory, size: " << largeSize << std::endl; - GTEST_SKIP(); - } - size_t adjustedSize = largeSize / sizeof(uint32_t); - EXPECT_LT(adjustedSize, UINT32_MAX); - uint32_t groupSizeX = static_cast(adjustedSize); - uint32_t groupSize[] = {groupSizeX, 1u, 1u}; - - auto builtInFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle); - builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2], - &groupSize[0], &groupSize[1], &groupSize[2]); - auto result = commandList->appendMemoryFill(largePtr, pattern, 4, largeSize, nullptr, 0, nullptr); - - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]); - - delete[] largePtr; -} - -HWTEST2_F(AppendFillTest, - givenCallToAppendMemoryFillWith4GBSizeAndSinglePatternThenSuccessIsReturnedAndGroupSizeXIsSetTosuggestedGroupSize, IsAtLeastSkl) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - auto commandList = std::make_unique>>(); - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - - size_t largeSize = (4ull * MemoryConstants::gigaByte); - uint8_t fakeBuffer = 0; - size_t adjustedSize = largeSize / sizeof(uint32_t); - EXPECT_LT(adjustedSize, UINT32_MAX); - uint32_t groupSizeX = static_cast(adjustedSize); - uint32_t groupSize[] = {groupSizeX, 1u, 1u}; - - auto builtInFunction = device->getBuiltinFunctionsLib()->getStatelessFunction(Builtin::FillBufferMiddle); - builtInFunction->suggestGroupSize(groupSize[0], groupSize[1], groupSize[2], - &groupSize[0], &groupSize[1], &groupSize[2]); - auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr); - - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(commandList->groupSizeX[0], groupSize[0]); -} - -HWTEST2_F(AppendFillTest, - givenCallToAppendMemoryFillWithBiggerThan16GBThenUnsupportedSizeIsReturned, IsAtLeastSkl) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - auto commandList = std::make_unique>>(); - commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - size_t largeSize = (16ull * MemoryConstants::gigaByte) + 1; - uint8_t fakeBuffer = 0; - auto result = commandList->appendMemoryFill(&fakeBuffer, pattern, 4, largeSize, nullptr, 0, nullptr); - EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); - EXPECT_EQ(0, fakeBuffer); -} - } // namespace ult } // namespace L0