From 51fa04fc60e7311ade134fa5d4ed99241315eb9b Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Wed, 26 Oct 2022 13:07:57 +0000 Subject: [PATCH] L0: Add experimental extensions for wait and write on memory Signed-off-by: Mateusz Jablonski --- .../driver_experimental/public/CMakeLists.txt | 5 +- .../api/driver_experimental/public/zex_api.h | 4 +- .../public/zex_cmdlist.cpp | 55 ++ .../driver_experimental/public/zex_cmdlist.h | 27 + .../driver_experimental/public/zex_common.h | 61 +++ .../driver_experimental/public/zex_memory.cpp | 2 +- .../cmdlist_extended/cmdlist_extended.inl | 17 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 102 ++++ .../get_extension_function_lookup_map.cpp | 5 +- .../unit_tests/sources/cmdlist/CMakeLists.txt | 2 +- .../cmdlist/test_cmdlist_memory_extension.cpp | 516 +++++++++++++++++- .../WAIT_AND_WRITE_ON_MEMORY.md | 103 ++++ 12 files changed, 858 insertions(+), 41 deletions(-) create mode 100644 level_zero/api/driver_experimental/public/zex_cmdlist.cpp create mode 100644 level_zero/api/driver_experimental/public/zex_cmdlist.h create mode 100644 level_zero/api/driver_experimental/public/zex_common.h create mode 100644 level_zero/doc/experimental_extensions/WAIT_AND_WRITE_ON_MEMORY.md diff --git a/level_zero/api/driver_experimental/public/CMakeLists.txt b/level_zero/api/driver_experimental/public/CMakeLists.txt index a2e43bee4b..11ab317a56 100644 --- a/level_zero/api/driver_experimental/public/CMakeLists.txt +++ b/level_zero/api/driver_experimental/public/CMakeLists.txt @@ -7,6 +7,9 @@ set(L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/zex_api.h + ${CMAKE_CURRENT_SOURCE_DIR}/zex_cmdlist.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/zex_cmdlist.h + ${CMAKE_CURRENT_SOURCE_DIR}/zex_common.h ${CMAKE_CURRENT_SOURCE_DIR}/zex_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zex_driver.h ${CMAKE_CURRENT_SOURCE_DIR}/zex_memory.cpp @@ -15,4 +18,4 @@ set(L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${CMAKE_CURRENT_SOURCE_DIR}/zex_module.h ) -set_property(GLOBAL PROPERTY L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API}) \ No newline at end of file +set_property(GLOBAL PROPERTY L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API}) diff --git a/level_zero/api/driver_experimental/public/zex_api.h b/level_zero/api/driver_experimental/public/zex_api.h index 839e3465c3..e04440db83 100644 --- a/level_zero/api/driver_experimental/public/zex_api.h +++ b/level_zero/api/driver_experimental/public/zex_api.h @@ -15,8 +15,10 @@ #include // driver experimental API headers +#include "level_zero/api/driver_experimental/public/zex_cmdlist.h" + #include "zex_driver.h" #include "zex_memory.h" #include "zex_module.h" -#endif // _ZEX_API_H \ No newline at end of file +#endif // _ZEX_API_H diff --git a/level_zero/api/driver_experimental/public/zex_cmdlist.cpp b/level_zero/api/driver_experimental/public/zex_cmdlist.cpp new file mode 100644 index 0000000000..45d27048f4 --- /dev/null +++ b/level_zero/api/driver_experimental/public/zex_cmdlist.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/api/driver_experimental/public/zex_cmdlist.h" + +#include "level_zero/core/source/cmdlist/cmdlist.h" + +namespace L0 { +ZE_APIEXPORT ze_result_t ZE_APICALL +zexCommandListAppendWaitOnMemory( + zex_command_list_handle_t hCommandList, + zex_wait_on_mem_desc_t *desc, + void *ptr, + uint32_t data, + zex_event_handle_t hSignalEvent) { + try { + { + if (nullptr == hCommandList) + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + return L0::CommandList::fromHandle(hCommandList)->appendWaitOnMemory(reinterpret_cast(desc), ptr, data, static_cast(hSignalEvent)); + } catch (ze_result_t &result) { + return result; + } catch (std::bad_alloc &) { + return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (std::exception &) { + return ZE_RESULT_ERROR_UNKNOWN; + } +} + +ZE_APIEXPORT ze_result_t ZE_APICALL +zexCommandListAppendWriteToMemory( + zex_command_list_handle_t hCommandList, + zex_write_to_mem_desc_t *desc, + void *ptr, + uint64_t data) { + try { + { + if (nullptr == hCommandList) + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + return L0::CommandList::fromHandle(hCommandList)->appendWriteToMemory(reinterpret_cast(desc), ptr, data); + } catch (ze_result_t &result) { + return result; + } catch (std::bad_alloc &) { + return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (std::exception &) { + return ZE_RESULT_ERROR_UNKNOWN; + } +} +} // namespace L0 diff --git a/level_zero/api/driver_experimental/public/zex_cmdlist.h b/level_zero/api/driver_experimental/public/zex_cmdlist.h new file mode 100644 index 0000000000..6ee139250c --- /dev/null +++ b/level_zero/api/driver_experimental/public/zex_cmdlist.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "level_zero/api/driver_experimental/public/zex_common.h" +#include + +namespace L0 { + +ZE_APIEXPORT ze_result_t ZE_APICALL +zexCommandListAppendWaitOnMemory( + zex_command_list_handle_t hCommandList, + zex_wait_on_mem_desc_t *desc, + void *ptr, + uint32_t data, + zex_event_handle_t hSignalEvent); +ZE_APIEXPORT ze_result_t ZE_APICALL +zexCommandListAppendWriteToMemory( + zex_command_list_handle_t hCommandList, + zex_write_to_mem_desc_t *desc, + void *ptr, + uint64_t data); +} // namespace L0 diff --git a/level_zero/api/driver_experimental/public/zex_common.h b/level_zero/api/driver_experimental/public/zex_common.h new file mode 100644 index 0000000000..752cc43419 --- /dev/null +++ b/level_zero/api/driver_experimental/public/zex_common.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#ifndef _ZEX_COMMON_H +#define _ZEX_COMMON_H +#if defined(__cplusplus) +#pragma once +#endif +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of command list object +typedef ze_command_list_handle_t zex_command_list_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of event object +typedef ze_event_handle_t zex_event_handle_t; + +#define ZEX_BIT(_i) (1 << _i) + +typedef uint32_t zex_mem_action_scope_flags_t; +typedef enum _zex_mem_action_scope_flag_t { + ZEX_MEM_ACTION_SCOPE_FLAG_SUBDEVICE = ZEX_BIT(0), + ZEX_MEM_ACTION_SCOPE_FLAG_DEVICE = ZEX_BIT(1), + ZEX_MEM_ACTION_SCOPE_FLAG_HOST = ZEX_BIT(2), + ZEX_MEM_ACTION_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff +} zex_mem_action_scope_flag_t; + +typedef uint32_t zex_wait_on_mem_action_flags_t; +typedef enum _zex_wait_on_mem_action_flag_t { + ZEX_WAIT_ON_MEMORY_FLAG_EQUAL = ZEX_BIT(0), + ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL = ZEX_BIT(1), + ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN = ZEX_BIT(2), + ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL = ZEX_BIT(3), + ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN = ZEX_BIT(4), + ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL = ZEX_BIT(5), + ZEX_WAIT_ON_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff +} zex_wait_on_mem_action_flag_t; + +typedef struct _zex_wait_on_mem_desc_t { + zex_wait_on_mem_action_flags_t actionFlag; + zex_mem_action_scope_flags_t waitScope; +} zex_wait_on_mem_desc_t; + +typedef struct _zex_write_to_mem_desc_t { + zex_mem_action_scope_flags_t writeScope; +} zex_write_to_mem_desc_t; + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // _ZEX_COMMON_EXTENDED_H diff --git a/level_zero/api/driver_experimental/public/zex_memory.cpp b/level_zero/api/driver_experimental/public/zex_memory.cpp index 9544386c7d..a319347b4a 100644 --- a/level_zero/api/driver_experimental/public/zex_memory.cpp +++ b/level_zero/api/driver_experimental/public/zex_memory.cpp @@ -53,4 +53,4 @@ zexMemOpenIpcHandles( void **pptr) { return L0::zexMemOpenIpcHandles(hContext, hDevice, numIpcHandles, pIpcHandles, flags, pptr); } -} \ No newline at end of file +} diff --git a/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl b/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl index 513c043999..83e93c3630 100644 --- a/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl +++ b/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -59,19 +59,4 @@ ze_result_t CommandListCoreFamily::appendPipeControl(void *dstPtr return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } -template -ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, - void *ptr, - uint32_t data, - ze_event_handle_t hSignalEvent) { - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -template -ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc, - void *ptr, - uint64_t data) { - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9fec05193f..ad08458295 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -36,6 +36,7 @@ #include "shared/source/program/sync_buffer_handler.inl" #include "shared/source/utilities/software_tags_manager.h" +#include "level_zero/api/driver_experimental/public/zex_cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" @@ -2596,4 +2597,105 @@ void CommandListCoreFamily::setupFillKernelArguments(size_t baseO } } +template +ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, + void *ptr, + uint32_t data, + ze_event_handle_t hSignalEvent) { + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + + auto descriptor = reinterpret_cast(desc); + COMPARE_OPERATION comparator; + switch (descriptor->actionFlag) { + case ZEX_WAIT_ON_MEMORY_FLAG_EQUAL: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD; + break; + case ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD; + break; + case ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_SDD; + break; + case ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD; + break; + case ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_SDD; + break; + case ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL: + comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD; + break; + default: + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + + auto srcAllocationStruct = getAlignedAllocation(this->device, ptr, sizeof(uint32_t), true); + UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr); + commandContainer.addToResidencyContainer(srcAllocationStruct.alloc); + uint64_t gpuAddress = static_cast(srcAllocationStruct.alignedAllocationPtr); + NEO::EncodeSempahore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), + gpuAddress, + data, + comparator); + + if (hSignalEvent) { + auto event = Event::fromHandle(hSignalEvent); + const auto &hwInfo = this->device->getHwInfo(); + + commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); + uint64_t baseAddr = event->getGpuAddress(this->device); + size_t eventSignalOffset = 0; + + if (isCopyOnly()) { + NEO::MiFlushArgs args; + args.commandWithPostSync = true; + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), + Event::STATE_SIGNALED, args, hwInfo); + } else { + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(!!event->signalScope, hwInfo); + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData, + ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, + hwInfo, + args); + } + } + return ZE_RESULT_SUCCESS; +} + +template +ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc, + void *ptr, + uint64_t data) { + auto descriptor = reinterpret_cast(desc); + + size_t bufSize = sizeof(uint64_t); + auto dstAllocationStruct = getAlignedAllocation(this->device, ptr, bufSize, false); + UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr); + commandContainer.addToResidencyContainer(dstAllocationStruct.alloc); + + const auto &hwInfo = this->device->getHwInfo(); + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(descriptor->writeScope, hwInfo); + args.dcFlushEnable &= dstAllocationStruct.needsFlush; + const uint64_t gpuAddress = static_cast(dstAllocationStruct.alignedAllocationPtr); + + if (isCopyOnly()) { + NEO::MiFlushArgs args; + args.commandWithPostSync = true; + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress, + data, args, hwInfo); + } else { + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *commandContainer.getCommandStream(), + NEO::PostSyncMode::ImmediateData, + gpuAddress, + data, + hwInfo, + args); + } + return ZE_RESULT_SUCCESS; +} + } // namespace L0 diff --git a/level_zero/core/source/get_extension_function_lookup_map.cpp b/level_zero/core/source/get_extension_function_lookup_map.cpp index db30b35bd6..ade4050b07 100644 --- a/level_zero/core/source/get_extension_function_lookup_map.cpp +++ b/level_zero/core/source/get_extension_function_lookup_map.cpp @@ -22,9 +22,12 @@ std::unordered_map getExtensionFunctionsLookupMap() { addToMap(lookupMap, zexMemGetIpcHandles); addToMap(lookupMap, zexMemOpenIpcHandles); + + addToMap(lookupMap, zexCommandListAppendWaitOnMemory); + addToMap(lookupMap, zexCommandListAppendWriteToMemory); #undef addToMap return lookupMap; } -} // namespace L0 \ No newline at end of file +} // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt b/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt index 5101d20717..3ad9734bf0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt +++ b/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt @@ -25,7 +25,7 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_blit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_fill.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_multipartition_prologue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_cmdlist_memory_extension.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_memory_extension.cpp ) if(TESTS_XEHP_AND_LATER) diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp index 8dfa4e34cc..dffe801351 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp @@ -1,34 +1,28 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" -#include "shared/test/common/fixtures/device_fixture.h" -#include "shared/test/common/mocks/mock_memory_manager.h" -#include "shared/test/common/test_macros/test.h" -#include "shared/test/common/test_macros/test_base.h" +#include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/test_macros/hw_test.h" -#include "level_zero/core/source/context/context_imp.h" -#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" +#include "level_zero/api/driver_experimental/public/zex_api.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" -#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" -#include "level_zero/core/test/unit_tests/mocks/mock_device.h" -#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" namespace L0 { namespace ult { -class CommandListMemoryExtensionFixture : public DeviceFixture { +class CommandListWaitOnMemFixture : public DeviceFixture { public: void setUp() { DeviceFixture::setUp(); ze_result_t returnValue; commandList.reset(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); + commandListBcs.reset(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue))); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; @@ -40,6 +34,7 @@ class CommandListMemoryExtensionFixture : public DeviceFixture { eventDesc.signal = 0; eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); size_t size = sizeof(uint32_t); @@ -56,34 +51,515 @@ class CommandListMemoryExtensionFixture : public DeviceFixture { context->freeMem(ptr); event.reset(nullptr); eventPool.reset(nullptr); + commandListBcs.reset(nullptr); commandList.reset(nullptr); DeviceFixture::tearDown(); } std::unique_ptr commandList; + std::unique_ptr commandListBcs; std::unique_ptr eventPool; std::unique_ptr event; uint32_t waitMemData = 1u; void *ptr = nullptr; }; -using CommandListAppendWaitOnMemExtension = Test; +using CommandListAppendWaitOnMem = Test; -TEST_F(CommandListAppendWaitOnMemExtension, givenAppendWaitOnMemReturnsUnsupported) { +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataAndNotEqualOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; - result = commandList->appendWaitOnMemory(nullptr, nullptr, 1u, nullptr); - EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } -using CommandListAppendWriteToMemExtension = Test; +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataAndEqualOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; -TEST_F(CommandListAppendWriteToMemExtension, givenAppendWriteToMemReturnsUnsupported) { + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataGreaterOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataGreaterThanEqualOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataLessThanOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataLessThanEqualOpThenSemaphoreWaitProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndInvalidOpThenReturnsInvalid) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_result_t result = ZE_RESULT_SUCCESS; + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_BIT(6); + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostScopeThenSemaphoreWaitAndPipeControlProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + std::unique_ptr signalEventPool; + std::unique_ptr signalEvent; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + signalEventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + signalEvent = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, signalEvent->toHandle()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + auto itorPC = findAll(itor, cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + auto gpuAddress = event->getGpuAddress(this->device); + EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + EXPECT_EQ(NEO::MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoScopeThenSemaphoreWaitAndPipeControlProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + std::unique_ptr signalEventPool; + std::unique_ptr signalEvent; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + signalEventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + signalEvent = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, signalEvent->toHandle()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + auto itorPC = findAll(itor, cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + auto gpuAddress = event->getGpuAddress(this->device); + EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + EXPECT_FALSE(cmd->getDcFlushEnable()); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemOnBcsWithSignalEventAndNoScopeThenSemaphoreWaitAndFlushDwProgrammedCorrectly) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandListBcs->commandContainer; + std::unique_ptr signalEventPool; + std::unique_ptr signalEvent; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + signalEventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + signalEvent = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL; + result = commandListBcs->appendWaitOnMemory(reinterpret_cast(&desc), ptr, waitMemData, signalEvent->toHandle()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + auto itorFDW = findAll(itor, cmdList.end()); + ASSERT_NE(0u, itorFDW.size()); + bool postSyncFound = false; + for (auto it : itorFDW) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) { + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + auto gpuAddress = event->getGpuAddress(device); + EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST2_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithNoScopeAndSystemMemoryPtrThenAlignedPtrUsed, IsAtLeastSkl) { + auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + size_t cmdListHostPtrSize = MemoryConstants::pageSize; + void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize); + void *startMemory = cmdListHostBuffer; + void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); + size_t expectedOffset = ptrDiff(startMemory, baseAddress); + + AlignedAllocationData outData = commandList->getAlignedAllocation(device, startMemory, cmdListHostPtrSize, false); + ASSERT_NE(nullptr, outData.alloc); + auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); + EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); + EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr); + EXPECT_EQ(expectedOffset, outData.offset); + + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_wait_on_mem_desc_t desc; + desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL; + result = commandList->appendWaitOnMemory(reinterpret_cast(&desc), cmdListHostBuffer, waitMemData, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(static_cast(waitMemData), cmd->getSemaphoreDataDword()); + EXPECT_EQ(expectedGpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); + + commandList->removeHostPtrAllocations(); + device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer); +} + +using CommandListAppendWriteToMem = Test; + +HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithNoScopeThenPipeControlEncodedCorrectly) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_write_to_mem_desc_t desc = {}; uint64_t data = 0xabc; - result = commandList->appendWriteToMemory(nullptr, nullptr, data); - EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); + result = commandList->appendWriteToMemory(reinterpret_cast(&desc), ptr, data); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), data); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemOnBcsWithNoScopeThenFlushDwEncodedCorrectly) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandListBcs->commandContainer; + + zex_write_to_mem_desc_t desc = {}; + uint64_t data = 0xabc; + result = commandListBcs->appendWriteToMemory(reinterpret_cast(&desc), ptr, data); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorFDW = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorFDW.size()); + bool postSyncFound = false; + for (auto it : itorFDW) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) { + EXPECT_EQ(cmd->getImmediateData(), data); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeControlEncodedCorrectly) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_write_to_mem_desc_t desc = {}; + desc.writeScope = ZEX_MEM_ACTION_SCOPE_FLAG_HOST; + uint64_t data = 0xabc; + result = commandList->appendWriteToMemory(reinterpret_cast(&desc), ptr, data); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), data); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(NEO::MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + +HWTEST2_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeControlEncodedCorrectlyAlignedPtrUsed, IsAtLeastSkl) { + auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + size_t cmdListHostPtrSize = MemoryConstants::pageSize; + void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize); + void *startMemory = cmdListHostBuffer; + void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); + size_t expectedOffset = ptrDiff(startMemory, baseAddress); + + AlignedAllocationData outData = commandList->getAlignedAllocation(device, startMemory, cmdListHostPtrSize, false); + ASSERT_NE(nullptr, outData.alloc); + auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); + EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); + EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr); + EXPECT_EQ(expectedOffset, outData.offset); + + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto &commandContainer = commandList->commandContainer; + + zex_write_to_mem_desc_t desc = {}; + desc.writeScope = ZEX_MEM_ACTION_SCOPE_FLAG_HOST; + uint64_t data = 0xabc; + result = commandList->appendWriteToMemory(reinterpret_cast(&desc), cmdListHostBuffer, data); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), data); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(NEO::MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); + uint64_t pcAddress = cmd->getAddress() | (static_cast(cmd->getAddressHigh()) << 32); + EXPECT_EQ(expectedGpuAddress, pcAddress); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); + commandList->removeHostPtrAllocations(); + device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer); } } // namespace ult diff --git a/level_zero/doc/experimental_extensions/WAIT_AND_WRITE_ON_MEMORY.md b/level_zero/doc/experimental_extensions/WAIT_AND_WRITE_ON_MEMORY.md new file mode 100644 index 0000000000..29b15bead5 --- /dev/null +++ b/level_zero/doc/experimental_extensions/WAIT_AND_WRITE_ON_MEMORY.md @@ -0,0 +1,103 @@ + + +# Wait On Memory and Write To Memory + +* [Overview](#Overview) +* [Definitions](#Definitions) +* [Known Issues and Limitations](#Known-Issues-and-Limitations) + +# Overview + +## Wait On Memory + +Wait On Memory provides a low level dependency mechanism through memory locations. It could be used to implement event-like synchronization mechanism where the wait on a memory location is on a value written by a EU thread. A potential use case is to hide context switching latencies between different engine classes. + +This functionality is also useful in heterogenous multi-device synchronization setups where, for example, a network card could write to memory location to indicate a packet is ready for consumption and the device waits on memory location via this extension. + +## Write To Memory +Write To Memory is a counterpart to the wait on memory method to allow for scenarios where user prefers to write to the memory location from the accelerator device. + +The usage models are similar to the Wait on memory functionality in that it could be used to setup cross-engine dependency chain within a device or provide mechanism to setup heterogenous multi-device dependency chains. + +# Definitions + +```cpp +typedef struct _zex_wait_on_mem_desc_t { + zex_wait_on_mem_action_flags_t actionFlag; + zex_mem_action_scope_flags_t waitScope; +} zex_wait_on_mem_desc_t; + +typedef struct _zex_write_to_mem_desc_t { + zex_mem_action_scope_flags_t writeScope; +} zex_write_to_mem_desc_t; + +## Interfaces + +```cpp +zexCommandListAppendWaitOnMemory( + zex_command_list_handle_t hCommandList, + zex_wait_on_mem_desc_t *desc, + void *ptr, + uint32_t data, + zex_event_handle_t hSignalEvent); + +zexCommandListAppendWriteToMemory( + zex_command_list_handle_t hCommandList, + zex_write_to_mem_desc_t *desc, + void *ptr, + uint64_t data); + +## Enums + +typedef uint32_t zex_mem_action_scope_flags_t; +typedef enum _zex_mem_action_scope_flag_t { + ZEX_MEM_ACTION_SCOPE_FLAG_SUBDEVICE = ZEX_BIT(0), + ZEX_MEM_ACTION_SCOPE_FLAG_DEVICE = ZEX_BIT(1), + ZEX_MEM_ACTION_SCOPE_FLAG_HOST = ZEX_BIT(2), + ZEX_MEM_ACTION_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff +} zex_mem_action_scope_flag_t; + +typedef uint32_t zex_wait_on_mem_action_flags_t; +typedef enum _zex_wait_on_mem_action_flag_t { + ZEX_WAIT_ON_MEMORY_FLAG_EQUAL = ZEX_BIT(0), + ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL = ZEX_BIT(1), + ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN = ZEX_BIT(2), + ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL = ZEX_BIT(3), + ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN = ZEX_BIT(4), + ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL = ZEX_BIT(5), + ZEX_WAIT_ON_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff +} zex_wait_on_mem_action_flag_t; + +## Programming example + +```cpp +// Create a descriptor for wait on mem +zex_wait_on_mem_desc_t waitDesc = {}; +waitDesc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL; +uint32_t initData = 0; + +//Initialize memory location +void *zeBuf = nullptr; + +//Append Wait On Memory +zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuf); + +//Append Wait On Memory +zeDriverGetExtensionFunctionAddress(driverHandle, "zexCommandListAppendWaitOnMemory", pfnWaitMemFn); +pfnWaitMemFn(cmdList, &waitDesc, zeBuffer, initData, NULL); + +zeDriverGetExtensionFunctionAddress(driverHandle, "zexCommandListAppendWriteToMemory", pfnWriteMemFn); +zex_write_to_mem_desc_t writeDesc = {}; +uint32_t signalData = 0x2; +pfnWriteMemFn(cmdList, &writeDesc, zeBuffer, signalData); + +# Known Issues and Limitations + +* Hangs may be seen when using BCS for appending memory copy with IPC buffers and if waitOnMemory is performed on any queue (CCS or BCS). +** WA is to use a barrier after WaitOnMemory and before any subsequent action.