L0: Add experimental extensions for wait and write on memory

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-10-26 13:07:57 +00:00
committed by Compute-Runtime-Automation
parent f067145137
commit 51fa04fc60
12 changed files with 858 additions and 41 deletions

View File

@ -7,6 +7,9 @@
set(L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/zex_api.h
${CMAKE_CURRENT_SOURCE_DIR}/zex_cmdlist.cpp
${CMAKE_CURRENT_SOURCE_DIR}/zex_cmdlist.h
${CMAKE_CURRENT_SOURCE_DIR}/zex_common.h
${CMAKE_CURRENT_SOURCE_DIR}/zex_driver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/zex_driver.h
${CMAKE_CURRENT_SOURCE_DIR}/zex_memory.cpp
@ -15,4 +18,4 @@ set(L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API
${CMAKE_CURRENT_SOURCE_DIR}/zex_module.h
)
set_property(GLOBAL PROPERTY L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API})
set_property(GLOBAL PROPERTY L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API ${L0_PUBLIC_DRIVER_EXPERIMENTAL_EXTENSIONS_API})

View File

@ -15,8 +15,10 @@
#include <level_zero/ze_api.h>
// driver experimental API headers
#include "level_zero/api/driver_experimental/public/zex_cmdlist.h"
#include "zex_driver.h"
#include "zex_memory.h"
#include "zex_module.h"
#endif // _ZEX_API_H
#endif // _ZEX_API_H

View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/api/driver_experimental/public/zex_cmdlist.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
namespace L0 {
ZE_APIEXPORT ze_result_t ZE_APICALL
zexCommandListAppendWaitOnMemory(
zex_command_list_handle_t hCommandList,
zex_wait_on_mem_desc_t *desc,
void *ptr,
uint32_t data,
zex_event_handle_t hSignalEvent) {
try {
{
if (nullptr == hCommandList)
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
return L0::CommandList::fromHandle(hCommandList)->appendWaitOnMemory(reinterpret_cast<void *>(desc), ptr, data, static_cast<ze_event_handle_t>(hSignalEvent));
} catch (ze_result_t &result) {
return result;
} catch (std::bad_alloc &) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (std::exception &) {
return ZE_RESULT_ERROR_UNKNOWN;
}
}
ZE_APIEXPORT ze_result_t ZE_APICALL
zexCommandListAppendWriteToMemory(
zex_command_list_handle_t hCommandList,
zex_write_to_mem_desc_t *desc,
void *ptr,
uint64_t data) {
try {
{
if (nullptr == hCommandList)
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
return L0::CommandList::fromHandle(hCommandList)->appendWriteToMemory(reinterpret_cast<void *>(desc), ptr, data);
} catch (ze_result_t &result) {
return result;
} catch (std::bad_alloc &) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (std::exception &) {
return ZE_RESULT_ERROR_UNKNOWN;
}
}
} // namespace L0

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/api/driver_experimental/public/zex_common.h"
#include <level_zero/ze_api.h>
namespace L0 {
ZE_APIEXPORT ze_result_t ZE_APICALL
zexCommandListAppendWaitOnMemory(
zex_command_list_handle_t hCommandList,
zex_wait_on_mem_desc_t *desc,
void *ptr,
uint32_t data,
zex_event_handle_t hSignalEvent);
ZE_APIEXPORT ze_result_t ZE_APICALL
zexCommandListAppendWriteToMemory(
zex_command_list_handle_t hCommandList,
zex_write_to_mem_desc_t *desc,
void *ptr,
uint64_t data);
} // namespace L0

View File

@ -0,0 +1,61 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#ifndef _ZEX_COMMON_H
#define _ZEX_COMMON_H
#if defined(__cplusplus)
#pragma once
#endif
#include <level_zero/ze_api.h>
#if defined(__cplusplus)
extern "C" {
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of command list object
typedef ze_command_list_handle_t zex_command_list_handle_t;
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of event object
typedef ze_event_handle_t zex_event_handle_t;
#define ZEX_BIT(_i) (1 << _i)
typedef uint32_t zex_mem_action_scope_flags_t;
typedef enum _zex_mem_action_scope_flag_t {
ZEX_MEM_ACTION_SCOPE_FLAG_SUBDEVICE = ZEX_BIT(0),
ZEX_MEM_ACTION_SCOPE_FLAG_DEVICE = ZEX_BIT(1),
ZEX_MEM_ACTION_SCOPE_FLAG_HOST = ZEX_BIT(2),
ZEX_MEM_ACTION_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff
} zex_mem_action_scope_flag_t;
typedef uint32_t zex_wait_on_mem_action_flags_t;
typedef enum _zex_wait_on_mem_action_flag_t {
ZEX_WAIT_ON_MEMORY_FLAG_EQUAL = ZEX_BIT(0),
ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL = ZEX_BIT(1),
ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN = ZEX_BIT(2),
ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL = ZEX_BIT(3),
ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN = ZEX_BIT(4),
ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL = ZEX_BIT(5),
ZEX_WAIT_ON_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff
} zex_wait_on_mem_action_flag_t;
typedef struct _zex_wait_on_mem_desc_t {
zex_wait_on_mem_action_flags_t actionFlag;
zex_mem_action_scope_flags_t waitScope;
} zex_wait_on_mem_desc_t;
typedef struct _zex_write_to_mem_desc_t {
zex_mem_action_scope_flags_t writeScope;
} zex_write_to_mem_desc_t;
#if defined(__cplusplus)
} // extern "C"
#endif
#endif // _ZEX_COMMON_EXTENDED_H

View File

@ -53,4 +53,4 @@ zexMemOpenIpcHandles(
void **pptr) {
return L0::zexMemOpenIpcHandles(hContext, hDevice, numIpcHandles, pIpcHandles, flags, pptr);
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -59,19 +59,4 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPipeControl(void *dstPtr
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
void *ptr,
uint32_t data,
ze_event_handle_t hSignalEvent) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc,
void *ptr,
uint64_t data) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
} // namespace L0

View File

@ -36,6 +36,7 @@
#include "shared/source/program/sync_buffer_handler.inl"
#include "shared/source/utilities/software_tags_manager.h"
#include "level_zero/api/driver_experimental/public/zex_cmdlist.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
#include "level_zero/core/source/device/device.h"
@ -2596,4 +2597,105 @@ void CommandListCoreFamily<gfxCoreFamily>::setupFillKernelArguments(size_t baseO
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
void *ptr,
uint32_t data,
ze_event_handle_t hSignalEvent) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
auto descriptor = reinterpret_cast<zex_wait_on_mem_desc_t *>(desc);
COMPARE_OPERATION comparator;
switch (descriptor->actionFlag) {
case ZEX_WAIT_ON_MEMORY_FLAG_EQUAL:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD;
break;
case ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD;
break;
case ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_SDD;
break;
case ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD;
break;
case ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_SDD;
break;
case ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL:
comparator = COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD;
break;
default:
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto srcAllocationStruct = getAlignedAllocation(this->device, ptr, sizeof(uint32_t), true);
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddress,
data,
comparator);
if (hSignalEvent) {
auto event = Event::fromHandle(hSignalEvent);
const auto &hwInfo = this->device->getHwInfo();
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
uint64_t baseAddr = event->getGpuAddress(this->device);
size_t eventSignalOffset = 0;
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset),
Event::STATE_SIGNALED, args, hwInfo);
} else {
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(!!event->signalScope, hwInfo);
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData,
ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED,
hwInfo,
args);
}
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc,
void *ptr,
uint64_t data) {
auto descriptor = reinterpret_cast<zex_write_to_mem_desc_t *>(desc);
size_t bufSize = sizeof(uint64_t);
auto dstAllocationStruct = getAlignedAllocation(this->device, ptr, bufSize, false);
UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr);
commandContainer.addToResidencyContainer(dstAllocationStruct.alloc);
const auto &hwInfo = this->device->getHwInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(descriptor->writeScope, hwInfo);
args.dcFlushEnable &= dstAllocationStruct.needsFlush;
const uint64_t gpuAddress = static_cast<uint64_t>(dstAllocationStruct.alignedAllocationPtr);
if (isCopyOnly()) {
NEO::MiFlushArgs args;
args.commandWithPostSync = true;
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), gpuAddress,
data, args, hwInfo);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandContainer.getCommandStream(),
NEO::PostSyncMode::ImmediateData,
gpuAddress,
data,
hwInfo,
args);
}
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@ -22,9 +22,12 @@ std::unordered_map<std::string, void *> getExtensionFunctionsLookupMap() {
addToMap(lookupMap, zexMemGetIpcHandles);
addToMap(lookupMap, zexMemOpenIpcHandles);
addToMap(lookupMap, zexCommandListAppendWaitOnMemory);
addToMap(lookupMap, zexCommandListAppendWriteToMemory);
#undef addToMap
return lookupMap;
}
} // namespace L0
} // namespace L0

View File

@ -25,7 +25,7 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_blit.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_fill.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_multipartition_prologue.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_cmdlist_memory_extension.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_memory_extension.cpp
)
if(TESTS_XEHP_AND_LATER)

View File

@ -1,34 +1,28 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/test_macros/test_base.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/api/driver_experimental/public/zex_api.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
namespace L0 {
namespace ult {
class CommandListMemoryExtensionFixture : public DeviceFixture {
class CommandListWaitOnMemFixture : public DeviceFixture {
public:
void setUp() {
DeviceFixture::setUp();
ze_result_t returnValue;
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
commandListBcs.reset(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
@ -40,6 +34,7 @@ class CommandListMemoryExtensionFixture : public DeviceFixture {
eventDesc.signal = 0;
eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
event = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
size_t size = sizeof(uint32_t);
@ -56,34 +51,515 @@ class CommandListMemoryExtensionFixture : public DeviceFixture {
context->freeMem(ptr);
event.reset(nullptr);
eventPool.reset(nullptr);
commandListBcs.reset(nullptr);
commandList.reset(nullptr);
DeviceFixture::tearDown();
}
std::unique_ptr<L0::ult::CommandList> commandList;
std::unique_ptr<L0::ult::CommandList> commandListBcs;
std::unique_ptr<EventPool> eventPool;
std::unique_ptr<Event> event;
uint32_t waitMemData = 1u;
void *ptr = nullptr;
};
using CommandListAppendWaitOnMemExtension = Test<CommandListMemoryExtensionFixture>;
using CommandListAppendWaitOnMem = Test<CommandListWaitOnMemFixture>;
TEST_F(CommandListAppendWaitOnMemExtension, givenAppendWaitOnMemReturnsUnsupported) {
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataAndNotEqualOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
result = commandList->appendWaitOnMemory(nullptr, nullptr, 1u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
using CommandListAppendWriteToMemExtension = Test<CommandListMemoryExtensionFixture>;
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataAndEqualOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
TEST_F(CommandListAppendWriteToMemExtension, givenAppendWriteToMemReturnsUnsupported) {
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataGreaterOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataGreaterThanEqualOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataLessThanOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndDataLessThanEqualOpThenSemaphoreWaitProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithValidAddressAndInvalidOpThenReturnsInvalid) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_BIT(6);
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndHostScopeThenSemaphoreWaitAndPipeControlProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
std::unique_ptr<EventPool> signalEventPool;
std::unique_ptr<Event> signalEvent;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
signalEventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
signalEvent = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, signalEvent->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
auto gpuAddress = event->getGpuAddress(this->device);
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithSignalEventAndNoScopeThenSemaphoreWaitAndPipeControlProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
std::unique_ptr<EventPool> signalEventPool;
std::unique_ptr<Event> signalEvent;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
signalEventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
signalEvent = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, signalEvent->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
auto gpuAddress = event->getGpuAddress(this->device);
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
EXPECT_FALSE(cmd->getDcFlushEnable());
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemOnBcsWithSignalEventAndNoScopeThenSemaphoreWaitAndFlushDwProgrammedCorrectly) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandListBcs->commandContainer;
std::unique_ptr<EventPool> signalEventPool;
std::unique_ptr<Event> signalEvent;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
signalEventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
signalEvent = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL;
result = commandListBcs->appendWaitOnMemory(reinterpret_cast<void *>(&desc), ptr, waitMemData, signalEvent->toHandle());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itorFDW = findAll<MI_FLUSH_DW *>(itor, cmdList.end());
ASSERT_NE(0u, itorFDW.size());
bool postSyncFound = false;
for (auto it : itorFDW) {
auto cmd = genCmdCast<MI_FLUSH_DW *>(*it);
if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) {
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
auto gpuAddress = event->getGpuAddress(device);
EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress);
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST2_F(CommandListAppendWaitOnMem, givenAppendWaitOnMemWithNoScopeAndSystemMemoryPtrThenAlignedPtrUsed, IsAtLeastSkl) {
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
size_t cmdListHostPtrSize = MemoryConstants::pageSize;
void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize);
void *startMemory = cmdListHostBuffer;
void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize);
size_t expectedOffset = ptrDiff(startMemory, baseAddress);
AlignedAllocationData outData = commandList->getAlignedAllocation(device, startMemory, cmdListHostPtrSize, false);
ASSERT_NE(nullptr, outData.alloc);
auto expectedGpuAddress = static_cast<uintptr_t>(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize));
EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer());
EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr);
EXPECT_EQ(expectedOffset, outData.offset);
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_wait_on_mem_desc_t desc;
desc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL;
result = commandList->appendWaitOnMemory(reinterpret_cast<void *>(&desc), cmdListHostBuffer, waitMemData, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace;
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(static_cast<uint32_t>(waitMemData), cmd->getSemaphoreDataDword());
EXPECT_EQ(expectedGpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace);
commandList->removeHostPtrAllocations();
device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer);
}
using CommandListAppendWriteToMem = Test<CommandListWaitOnMemFixture>;
HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithNoScopeThenPipeControlEncodedCorrectly) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_write_to_mem_desc_t desc = {};
uint64_t data = 0xabc;
result = commandList->appendWriteToMemory(nullptr, nullptr, data);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
result = commandList->appendWriteToMemory(reinterpret_cast<void *>(&desc), ptr, data);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), data);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemOnBcsWithNoScopeThenFlushDwEncodedCorrectly) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandListBcs->commandContainer;
zex_write_to_mem_desc_t desc = {};
uint64_t data = 0xabc;
result = commandListBcs->appendWriteToMemory(reinterpret_cast<void *>(&desc), ptr, data);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorFDW = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorFDW.size());
bool postSyncFound = false;
for (auto it : itorFDW) {
auto cmd = genCmdCast<MI_FLUSH_DW *>(*it);
if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) {
EXPECT_EQ(cmd->getImmediateData(), data);
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeControlEncodedCorrectly) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_write_to_mem_desc_t desc = {};
desc.writeScope = ZEX_MEM_ACTION_SCOPE_FLAG_HOST;
uint64_t data = 0xabc;
result = commandList->appendWriteToMemory(reinterpret_cast<void *>(&desc), ptr, data);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), data);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
HWTEST2_F(CommandListAppendWriteToMem, givenAppendWriteToMemWithScopeThenPipeControlEncodedCorrectlyAlignedPtrUsed, IsAtLeastSkl) {
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
size_t cmdListHostPtrSize = MemoryConstants::pageSize;
void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize);
void *startMemory = cmdListHostBuffer;
void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize);
size_t expectedOffset = ptrDiff(startMemory, baseAddress);
AlignedAllocationData outData = commandList->getAlignedAllocation(device, startMemory, cmdListHostPtrSize, false);
ASSERT_NE(nullptr, outData.alloc);
auto expectedGpuAddress = static_cast<uintptr_t>(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize));
EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer());
EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr);
EXPECT_EQ(expectedOffset, outData.offset);
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &commandContainer = commandList->commandContainer;
zex_write_to_mem_desc_t desc = {};
desc.writeScope = ZEX_MEM_ACTION_SCOPE_FLAG_HOST;
uint64_t data = 0xabc;
result = commandList->appendWriteToMemory(reinterpret_cast<void *>(&desc), cmdListHostBuffer, data);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
EXPECT_EQ(cmd->getImmediateData(), data);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_EQ(NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
uint64_t pcAddress = cmd->getAddress() | (static_cast<uint64_t>(cmd->getAddressHigh()) << 32);
EXPECT_EQ(expectedGpuAddress, pcAddress);
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
commandList->removeHostPtrAllocations();
device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer);
}
} // namespace ult

View File

@ -0,0 +1,103 @@
<!---
Copyright (C) 2022 Intel Corporation
SPDX-License-Identifier: MIT
-->
# Wait On Memory and Write To Memory
* [Overview](#Overview)
* [Definitions](#Definitions)
* [Known Issues and Limitations](#Known-Issues-and-Limitations)
# Overview
## Wait On Memory
Wait On Memory provides a low level dependency mechanism through memory locations. It could be used to implement event-like synchronization mechanism where the wait on a memory location is on a value written by a EU thread. A potential use case is to hide context switching latencies between different engine classes.
This functionality is also useful in heterogenous multi-device synchronization setups where, for example, a network card could write to memory location to indicate a packet is ready for consumption and the device waits on memory location via this extension.
## Write To Memory
Write To Memory is a counterpart to the wait on memory method to allow for scenarios where user prefers to write to the memory location from the accelerator device.
The usage models are similar to the Wait on memory functionality in that it could be used to setup cross-engine dependency chain within a device or provide mechanism to setup heterogenous multi-device dependency chains.
# Definitions
```cpp
typedef struct _zex_wait_on_mem_desc_t {
zex_wait_on_mem_action_flags_t actionFlag;
zex_mem_action_scope_flags_t waitScope;
} zex_wait_on_mem_desc_t;
typedef struct _zex_write_to_mem_desc_t {
zex_mem_action_scope_flags_t writeScope;
} zex_write_to_mem_desc_t;
## Interfaces
```cpp
zexCommandListAppendWaitOnMemory(
zex_command_list_handle_t hCommandList,
zex_wait_on_mem_desc_t *desc,
void *ptr,
uint32_t data,
zex_event_handle_t hSignalEvent);
zexCommandListAppendWriteToMemory(
zex_command_list_handle_t hCommandList,
zex_write_to_mem_desc_t *desc,
void *ptr,
uint64_t data);
## Enums
typedef uint32_t zex_mem_action_scope_flags_t;
typedef enum _zex_mem_action_scope_flag_t {
ZEX_MEM_ACTION_SCOPE_FLAG_SUBDEVICE = ZEX_BIT(0),
ZEX_MEM_ACTION_SCOPE_FLAG_DEVICE = ZEX_BIT(1),
ZEX_MEM_ACTION_SCOPE_FLAG_HOST = ZEX_BIT(2),
ZEX_MEM_ACTION_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff
} zex_mem_action_scope_flag_t;
typedef uint32_t zex_wait_on_mem_action_flags_t;
typedef enum _zex_wait_on_mem_action_flag_t {
ZEX_WAIT_ON_MEMORY_FLAG_EQUAL = ZEX_BIT(0),
ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL = ZEX_BIT(1),
ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN = ZEX_BIT(2),
ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL = ZEX_BIT(3),
ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN = ZEX_BIT(4),
ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL = ZEX_BIT(5),
ZEX_WAIT_ON_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff
} zex_wait_on_mem_action_flag_t;
## Programming example
```cpp
// Create a descriptor for wait on mem
zex_wait_on_mem_desc_t waitDesc = {};
waitDesc.actionFlag = ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL;
uint32_t initData = 0;
//Initialize memory location
void *zeBuf = nullptr;
//Append Wait On Memory
zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuf);
//Append Wait On Memory
zeDriverGetExtensionFunctionAddress(driverHandle, "zexCommandListAppendWaitOnMemory", pfnWaitMemFn);
pfnWaitMemFn(cmdList, &waitDesc, zeBuffer, initData, NULL);
zeDriverGetExtensionFunctionAddress(driverHandle, "zexCommandListAppendWriteToMemory", pfnWriteMemFn);
zex_write_to_mem_desc_t writeDesc = {};
uint32_t signalData = 0x2;
pfnWriteMemFn(cmdList, &writeDesc, zeBuffer, signalData);
# Known Issues and Limitations
* Hangs may be seen when using BCS for appending memory copy with IPC buffers and if waitOnMemory is performed on any queue (CCS or BCS).
** WA is to use a barrier after WaitOnMemory and before any subsequent action.