From 479d01c118d79dab3e220659187101275a7d6fec Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Tue, 22 Dec 2020 18:53:12 -0800 Subject: [PATCH] Improve zeCommandListAppendMemoryFill Performance Improve L0 fill operations by copying the pattern using two kernels: one that copies four bytes at a time, and one that takes care of the remainder. Additionally, a new allocation is created to fill up at least a cacheline. Signed-off-by: Jaime Arteaga --- .../source/builtin/builtin_functions_lib.h | 4 +- .../builtin/builtin_functions_lib_impl.cpp | 10 +- level_zero/core/source/cmdlist/cmdlist.h | 5 +- level_zero/core/source/cmdlist/cmdlist_hw.h | 10 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 126 +++++++---- .../unit_tests/sources/cmdlist/CMakeLists.txt | 3 +- .../sources/cmdlist/test_cmdlist_blit.cpp | 198 +----------------- .../sources/cmdlist/test_cmdlist_fill.cpp | 194 +++++++++++++++++ 8 files changed, 298 insertions(+), 252 deletions(-) create mode 100644 level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp diff --git a/level_zero/core/source/builtin/builtin_functions_lib.h b/level_zero/core/source/builtin/builtin_functions_lib.h index 732793c5d8..6415dfc2ce 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib.h +++ b/level_zero/core/source/builtin/builtin_functions_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -26,6 +26,8 @@ enum class Builtin : uint32_t { CopyBufferToBufferSide, FillBufferImmediate, FillBufferSSHOffset, + FillBufferMiddle, + FillBufferRightLeftover, QueryKernelTimestamps, QueryKernelTimestampsWithOffsets, COUNT diff --git a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp index 4e65d82478..03f26de22d 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp +++ b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -49,6 +49,14 @@ void BuiltinFunctionsLibImpl::initFunctions() { builtinName = "FillBufferSSHOffset"; builtin = NEO::EBuiltInOps::FillBuffer; break; + case Builtin::FillBufferMiddle: + builtinName = "FillBufferMiddle"; + builtin = NEO::EBuiltInOps::FillBuffer; + break; + case Builtin::FillBufferRightLeftover: + builtinName = "FillBufferRightLeftover"; + builtin = NEO::EBuiltInOps::FillBuffer; + break; case Builtin::QueryKernelTimestamps: builtinName = "QueryKernelTimestamps"; builtin = NEO::EBuiltInOps::QueryKernelTimestamps; diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 4dccfb69f7..8205da8d75 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -190,6 +190,9 @@ struct CommandList : _ze_command_list_handle_t { virtual ~CommandList(); NEO::CommandContainer commandContainer; bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; } + std::map &getHostPtrMap() { + return hostPtrMap; + }; protected: std::map hostPtrMap; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index efbaec876f..df908fd7ec 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -199,11 +199,11 @@ struct CommandListCoreFamily : CommandListImp { size_t bytesPerPixel, Vec3 copySize, Vec3 srcSize, Vec3 dstSize, ze_event_handle_t hSignalEvent); - ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, - const ze_group_count_t *pThreadGroupDimensions, - ze_event_handle_t hEvent, - bool isIndirect, - bool isPredicate); + MOCKABLE_VIRTUAL ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent, + bool isIndirect, + bool isPredicate); ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index e931df07bb..2f4bcda8d5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1135,9 +1135,9 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, return appendBlitFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); } - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); - if (ret) { - return ret; + ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents); + if (res) { + return res; } using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; @@ -1159,20 +1159,12 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size); - - uintptr_t srcPtr = reinterpret_cast(const_cast(pattern)); - size_t srcOffset = 0; - NEO::EncodeSurfaceState::getSshAlignedPointer(srcPtr, srcOffset); - auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); - Kernel *builtinFunction = nullptr; - uint32_t groupSizeX = 1u; - if (patternSize == 1) { - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); + auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); - groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; + uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; if (groupSizeX > static_cast(size)) { groupSizeX = static_cast(size); } @@ -1186,50 +1178,92 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinFunction->setArgumentValue(2, sizeof(value), &value); + appendEventForProfilingAllWalkers(hSignalEvent, true); + + uint32_t groups = static_cast(size) / groupSizeX; + ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; + res = CommandListCoreFamily::appendLaunchKernel(builtinFunction->toHandle(), + &dispatchFuncArgs, nullptr, + 0, nullptr); + if (res) { + return res; + } } else { - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferSSHOffset); + auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle); - auto patternAlloc = this->getAlignedAllocation(this->device, reinterpret_cast(srcPtr), srcOffset + patternSize); - if (patternAlloc.alloc == nullptr) { - DEBUG_BREAK_IF(true); - return ZE_RESULT_ERROR_UNKNOWN; - } - srcOffset += patternAlloc.offset; + size_t middleElSize = sizeof(uint32_t); + size_t adjustedSize = size / middleElSize; + uint32_t groupSizeX = static_cast(adjustedSize); + uint32_t groupSizeY = 1, groupSizeZ = 1; + builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); + builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); - groupSizeX = static_cast(std::min(patternSize, size)); - if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { - DEBUG_BREAK_IF(true); - return ZE_RESULT_ERROR_UNKNOWN; + uint32_t groups = static_cast(adjustedSize) / groupSizeX; + uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; + + size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize); + uint32_t patternSizeInEls = static_cast(patternAllocationSize / middleElSize); + + auto patternGfxAlloc = getAllocationFromHostPtrMap(pattern, patternAllocationSize); + if (patternGfxAlloc == nullptr) { + patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(), + patternAllocationSize, + NEO::GraphicsAllocation::AllocationType::FILL_PATTERN, + device->getNEODevice()->getDeviceBitfield()}); + hostPtrMap.insert(std::make_pair(pattern, patternGfxAlloc)); } + void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer(); + + uint64_t patternAllocPtr = reinterpret_cast(patternGfxAllocPtr); + uint64_t patternAllocOffset = 0; + uint64_t patternSizeToCopy = patternSize; + do { + memcpy_s(reinterpret_cast(patternAllocPtr + patternAllocOffset), + patternSizeToCopy, pattern, patternSizeToCopy); + + if ((patternAllocOffset + patternSizeToCopy) > patternAllocationSize) { + patternSizeToCopy = patternAllocationSize - patternAllocOffset; + } + + patternAllocOffset += patternSizeToCopy; + } while (patternAllocOffset < patternAllocationSize); builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); - builtinFunction->setArgBufferWithAlloc(2, patternAlloc.alignedAllocationPtr, - patternAlloc.alloc); - builtinFunction->setArgumentValue(3, sizeof(srcOffset), &srcOffset); - } + builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast(patternGfxAllocPtr), patternGfxAlloc); + builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls); - appendEventForProfilingAllWalkers(hSignalEvent, true); + appendEventForProfilingAllWalkers(hSignalEvent, true); - uint32_t groups = static_cast(size) / groupSizeX; - ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; + ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; + res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); + if (res) { + return res; + } - ze_result_t res = CommandListCoreFamily::appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); + if (groupRemainderSizeX) { + uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast(middleElSize); + uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize; - if (res) { - return res; - } + auto builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover); + builtinFunctionRemainder->setGroupSize(groupRemainderSizeX, 1u, 1u); + ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; - uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; - if (groupRemainderSizeX) { - builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u); - ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; - - size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX); - builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); - builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); - - res = CommandListCoreFamily::appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); + builtinFunctionRemainder->setArgBufferWithAlloc(0, + dstAllocation.alignedAllocationPtr, + dstAllocation.alloc); + builtinFunctionRemainder->setArgumentValue(1, + sizeof(dstOffsetRemainder), + &dstOffsetRemainder); + builtinFunctionRemainder->setArgBufferWithAlloc(2, + reinterpret_cast(patternGfxAllocPtr) + patternOffsetRemainder, + patternGfxAlloc); + builtinFunctionRemainder->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls); + res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent); + if (res) { + return res; + } + } } appendEventForProfilingAllWalkers(hSignalEvent, false); @@ -1488,7 +1522,7 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand appendWriteKernelTimestamp(hEvent, beforeWalker, true); } else { - NEO::PipeControlArgs args; + NEO::PipeControlArgs args = {}; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt b/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt index 8efe0d816f..410dbc85fb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt +++ b/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -16,5 +16,6 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_signal_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_wait_on_events.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_blit.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_fill.cpp ) add_subdirectories() diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index 9db0bcf096..83b6330e5c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -58,128 +58,6 @@ class MockDriverHandle : public L0::DriverHandleImp { NEO::SvmAllocationData data{rootDeviceIndex}; }; -template -class MockCommandListForMemFillHostPtr : public WhiteBox<::L0::CommandListCoreFamily> { - public: - MockCommandListForMemFillHostPtr() : WhiteBox<::L0::CommandListCoreFamily>() {} - - AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { - return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize); - } -}; - -uint32_t memoryFillMockGroupSizeX = 0, memoryFillMockGroupSizeY = 0, memoryFillMockGroupSizeZ = 0; -struct AppendMemoryFillFixture { - class MockDriverHandleHostPtr : public L0::DriverHandleImp { - public: - bool findAllocationDataForRange(const void *buffer, - size_t size, - NEO::SvmAllocationData **allocData) override { - if (buffer == reinterpret_cast(registeredGraphicsAllocationAddress)) { - mockAllocation.reset(new NEO::MockGraphicsAllocation(rootDeviceIndex, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, - reinterpret_cast(registeredGraphicsAllocationAddress), 0x1000, 0, sizeof(uint32_t), - MemoryPool::System4KBPages)); - data.gpuAllocations.addAllocation(mockAllocation.get()); - if (allocData) { - *allocData = &data; - } - return true; - } - return false; - } - const uint32_t rootDeviceIndex = 0u; - std::unique_ptr mockAllocation; - NEO::SvmAllocationData data{rootDeviceIndex}; - }; - struct MockKernelImmutableDataForMemFill : KernelImmutableData { - MockKernelImmutableDataForMemFill(L0::Device *l0device = nullptr) { - mockKernelDescriptor = new NEO::KernelDescriptor; - mockKernelDescriptor->kernelAttributes.simdSize = 32; - kernelDescriptor = mockKernelDescriptor; - return; - } - ~MockKernelImmutableDataForMemFill() override { - delete mockKernelDescriptor; - } - NEO::KernelDescriptor *mockKernelDescriptor = nullptr; - }; - class MockKernelForMemFill : public L0::KernelImp { - public: - MockKernelForMemFill() { - mockKernelImmData = new MockKernelImmutableDataForMemFill(); - } - ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, - uint32_t groupSizeZ) override { - memoryFillMockGroupSizeX = groupSizeX; - memoryFillMockGroupSizeY = groupSizeY; - memoryFillMockGroupSizeZ = groupSizeZ; - return ZE_RESULT_ERROR_UNKNOWN; - } - void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { - return; - } - void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { - return; - } - const MockKernelImmutableDataForMemFill *getImmutableData() const override { - return mockKernelImmData; - } - ~MockKernelForMemFill() override { - delete mockKernelImmData; - } - std::unique_ptr clone() const override { return nullptr; } - MockKernelImmutableDataForMemFill *mockKernelImmData = nullptr; - }; - - struct MockBuiltinFunctionsForMemFill : BuiltinFunctionsLibImpl { - MockBuiltinFunctionsForMemFill(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { - tmpMockKernel = new MockKernelForMemFill; - } - MockKernelForMemFill *getFunction(Builtin func) override { - return tmpMockKernel; - } - ~MockBuiltinFunctionsForMemFill() override { - delete tmpMockKernel; - } - MockKernelForMemFill *tmpMockKernel = nullptr; - }; - class MockDeviceHandle : public L0::DeviceImp { - public: - MockDeviceHandle() { - tmpMockBultinLib = new MockBuiltinFunctionsForMemFill{nullptr, nullptr}; - } - MockBuiltinFunctionsForMemFill *getBuiltinFunctionsLib() override { - return tmpMockBultinLib; - } - ~MockDeviceHandle() override { - delete tmpMockBultinLib; - } - MockBuiltinFunctionsForMemFill *tmpMockBultinLib = nullptr; - }; - virtual void SetUp() { // NOLINT(readability-identifier-naming) - memoryFillMockGroupSizeX = 0; - memoryFillMockGroupSizeY = 0; - memoryFillMockGroupSizeZ = 0; - neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); - neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); - NEO::DeviceVector devices; - devices.push_back(std::unique_ptr(neoDevice)); - driverHandle = std::make_unique>(); - deviceMock = std::make_unique(); - driverHandle->initialize(std::move(devices)); - neoMockDevice->incRefInternal(); - deviceMock.get()->neoDevice = neoMockDevice; - } - - virtual void TearDown() { // NOLINT(readability-identifier-naming) - } - std::unique_ptr> driverHandle; - std::unique_ptr deviceMock; - NEO::MockDevice *neoDevice = nullptr; - NEO::MockDevice *neoMockDevice = nullptr; - static constexpr uint64_t registeredGraphicsAllocationAddress = 0x1234; -}; - using AppendMemoryCopy = Test; using Platforms = IsAtLeastProduct; @@ -423,79 +301,5 @@ HWTEST2_F(AppendMemoryCopyFromContext, givenCommandListThenUpOnPerformingAppendM EXPECT_EQ(ZE_RESULT_SUCCESS, result); } -using AppendMemoryfillHostPtr = Test; -HWTEST2_F(AppendMemoryfillHostPtr, givenTwoCommandListsAndHostPointerUsedInBothWhenMemoryfillCalledThenNewUniqueAllocationIsAddedtoHostPtrMap, Platforms) { - MockCommandListForMemFillHostPtr cmdListFirst; - MockCommandListForMemFillHostPtr cmdListSecond; - MockDriverHandleHostPtr driverHandleMock; - deviceMock.get()->setDriverHandle(&driverHandleMock); - cmdListFirst.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - cmdListSecond.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - uint64_t pattern[4] = {1, 2, 3, 4}; - void *ptr = reinterpret_cast(registeredGraphicsAllocationAddress); - cmdListFirst.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); - cmdListSecond.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); - EXPECT_EQ(cmdListFirst.hostPtrMap.size(), 1u); - EXPECT_EQ(cmdListSecond.hostPtrMap.size(), 1u); - auto allocationFirstList = cmdListFirst.hostPtrMap.begin()->second; - auto allocationSecondList = cmdListSecond.hostPtrMap.begin()->second; - EXPECT_NE(allocationFirstList, allocationSecondList); - deviceMock.get()->setDriverHandle(driverHandle.get()); -} - -HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWhenMemoryfillCalledThenNewAllocationisAddedToHostPtrMap, Platforms) { - MockCommandListForMemFillHostPtr cmdList; - MockDriverHandleHostPtr driverHandleMock; - deviceMock.get()->setDriverHandle(&driverHandleMock); - cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - uint64_t pattern[4] = {1, 2, 3, 4}; - void *ptr = reinterpret_cast(registeredGraphicsAllocationAddress); - cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.hostPtrMap.size(), 1u); - deviceMock.get()->setDriverHandle(driverHandle.get()); -} - -HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithPatternSizeGreaterThanSizeWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) { - MockCommandListForMemFillHostPtr cmdList; - MockDriverHandleHostPtr driverHandleMock; - size_t patternSize = 0x1001; - size_t dstSize = 0x1000; - deviceMock.get()->setDriverHandle(&driverHandleMock); - cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - uint64_t pattern[4] = {1, 2, 3, 4}; - void *ptr = reinterpret_cast(registeredGraphicsAllocationAddress); - cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), patternSize, dstSize, nullptr, 0, nullptr); - EXPECT_EQ(memoryFillMockGroupSizeX, 0x1000u); - deviceMock.get()->setDriverHandle(driverHandle.get()); -} - -HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLessThanSimdWhenMemoryfillCalledThenGroupSizeXEqualsSize, Platforms) { - MockCommandListForMemFillHostPtr cmdList; - MockDriverHandleHostPtr driverHandleMock; - size_t patternSize = 1; - size_t dstSize = 16; - deviceMock.get()->setDriverHandle(&driverHandleMock); - cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - uint64_t pattern[4] = {1, 2, 3, 4}; - void *ptr = reinterpret_cast(registeredGraphicsAllocationAddress); - cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), patternSize, dstSize, nullptr, 0, nullptr); - EXPECT_EQ(memoryFillMockGroupSizeX, 16u); - deviceMock.get()->setDriverHandle(driverHandle.get()); -} - -HWTEST2_F(AppendMemoryfillHostPtr, givenCommandListAndHostPointerWithSizeLargerThanSimdWhenMemoryfillCalledThenGroupSizeIsSimd, Platforms) { - MockCommandListForMemFillHostPtr cmdList; - MockDriverHandleHostPtr driverHandleMock; - size_t patternSize = 1; - size_t dstSize = 64; - deviceMock.get()->setDriverHandle(&driverHandleMock); - cmdList.initialize(deviceMock.get(), NEO::EngineGroupType::RenderCompute); - uint64_t pattern[4] = {1, 2, 3, 4}; - void *ptr = reinterpret_cast(registeredGraphicsAllocationAddress); - cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), patternSize, dstSize, nullptr, 0, nullptr); - EXPECT_EQ(memoryFillMockGroupSizeX, 32u); - deviceMock.get()->setDriverHandle(driverHandle.get()); -} - } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp new file mode 100644 index 0000000000..5877c96dd5 --- /dev/null +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/test/unit_test/mocks/mock_graphics_allocation.h" + +#include "test.h" + +#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" +#include "level_zero/core/source/kernel/kernel_imp.h" +#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" +#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" + +#include + +namespace L0 { +namespace ult { + +class AppendFillFixture : public DeviceFixture, public ::testing::Test { + public: + class MockDriverFillHandle : public L0::DriverHandleImp { + public: + bool findAllocationDataForRange(const void *buffer, + size_t size, + NEO::SvmAllocationData **allocData) override { + mockAllocation.reset(new NEO::MockGraphicsAllocation(const_cast(buffer), size)); + data.gpuAllocations.addAllocation(mockAllocation.get()); + if (allocData) { + *allocData = &data; + } + return true; + } + const uint32_t rootDeviceIndex = 0u; + std::unique_ptr mockAllocation; + NEO::SvmAllocationData data{rootDeviceIndex}; + }; + + template + class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily> { + public: + MockCommandList() : WhiteBox<::L0::CommandListCoreFamily>() {} + + ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent, + bool isIndirect, + bool isPredicate) override { + if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) { + return ZE_RESULT_ERROR_UNKNOWN; + } + + numberOfCallsToAppendLaunchKernelWithParams++; + return CommandListCoreFamily::appendLaunchKernelWithParams(hKernel, + pThreadGroupDimensions, + hEvent, + isIndirect, + isPredicate); + } + + uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits::max(); + uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0; + }; + + void SetUp() override { + dstPtr = new uint8_t[allocSize]; + + neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); + auto mockBuiltIns = new MockBuiltins(); + neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); + NEO::DeviceVector devices; + devices.push_back(std::unique_ptr(neoDevice)); + driverHandle = std::make_unique>(); + driverHandle->initialize(std::move(devices)); + device = driverHandle->devices[0]; + } + + void TearDown() override { + delete[] dstPtr; + } + + std::unique_ptr> driverHandle; + NEO::MockDevice *neoDevice = nullptr; + L0::Device *device = nullptr; + static constexpr size_t allocSize = 512; + static constexpr size_t patternSize = 4; + uint8_t *dstPtr = nullptr; + uint8_t pattern[patternSize] = {1, 2, 3, 4}; +}; + +using Platforms = IsAtLeastProduct; + +HWTEST2_F(AppendFillFixture, + givenCallToAppendMemoryFillThenSuccessIsReturned, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + + auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + +HWTEST2_F(AppendFillFixture, + givenCallToAppendMemoryFillWithAppendLaunchKernelFailureThenSuccessIsNotReturned, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0; + + auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); + EXPECT_NE(ZE_RESULT_SUCCESS, result); +} + +HWTEST2_F(AppendFillFixture, + givenTwoCallsToAppendMemoryFillWithSamePatternThenAllocationIsAddedtoHostPtrMapOnlyOnce, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + + ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t hostPtrMapSize = commandList->getHostPtrMap().size(); + EXPECT_EQ(hostPtrMapSize, 1u); + + uint8_t *newDstPtr = new uint8_t[allocSize]; + result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t newHostPtrMapSize = commandList->getHostPtrMap().size(); + + EXPECT_EQ(hostPtrMapSize, newHostPtrMapSize); + + delete[] newDstPtr; +} + +HWTEST2_F(AppendFillFixture, + givenTwoCallsToAppendMemoryFillWithDifferentPatternsThenHostPtrSizeIncrementsByOne, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + + ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t hostPtrMapSize = commandList->getHostPtrMap().size(); + EXPECT_EQ(hostPtrMapSize, 1u); + + uint8_t newPattern[patternSize] = {1, 2, 3, 4}; + result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t newHostPtrMapSize = commandList->getHostPtrMap().size(); + + EXPECT_EQ(hostPtrMapSize + 1u, newHostPtrMapSize); +} + +HWTEST2_F(AppendFillFixture, + givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + + size_t nonMultipleSize = allocSize + 1; + uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; + auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + delete[] nonMultipleDstPtr; +} + +HWTEST2_F(AppendFillFixture, + givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeAndAppendLaunchKernelFailureOnRemainderThenSuccessIsNotReturned, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute); + commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 1; + + size_t nonMultipleSize = allocSize + 1; + uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; + auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr); + EXPECT_NE(ZE_RESULT_SUCCESS, result); + + delete[] nonMultipleDstPtr; +} + +} // namespace ult +} // namespace L0