pass event host scope flag to walker when single packet events are used

Related-To: NEO-7434

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-11-01 16:19:35 +00:00
committed by Compute-Runtime-Automation
parent 946dd27125
commit 29eee35491
4 changed files with 181 additions and 8 deletions

View File

@@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams {
bool isKernelSplitOperation = false;
bool isBuiltInKernel = false;
bool isDestinationAllocationInSystemMemory = false;
bool isHostSignalScopeEvent = false;
};
struct CmdListReturnPoint {

View File

@@ -303,13 +303,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
return ret;
}
CmdListKernelLaunchParams launchParams = {};
Event *event = nullptr;
if (hEvent) {
event = Event::fromHandle(hEvent);
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
appendEventForProfiling(event, true, false);
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams);
@@ -332,9 +333,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
return ret;
}
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
launchParams.isPredicate = true;
Event *event = nullptr;
if (hEvent) {
event = Event::fromHandle(hEvent);
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
appendEventForProfiling(event, true, false);
@@ -346,9 +352,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
for (uint32_t i = 0; i < numKernels; i++) {
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
launchParams.isPredicate = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
nullptr, launchParams);
@@ -1183,17 +1186,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
return ret;
}
CmdListKernelLaunchParams launchParams = {};
Event *signalEvent = nullptr;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
kernelCounter += middleSizeBytes > 0 ? 1 : 0;
kernelCounter += rightSize > 0 ? 1 : 0;
CmdListKernelLaunchParams launchParams = {};
launchParams.isKernelSplitOperation = kernelCounter > 1;
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
@@ -1544,9 +1548,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
CmdListKernelLaunchParams launchParams = {};
Event *signalEvent = nullptr;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
}
if (isCopyOnly()) {
@@ -1596,7 +1603,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
}
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;

View File

@@ -167,7 +167,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
uint64_t eventAddress = 0;
bool isTimestampEvent = false;
bool l3FlushEnable = false;
bool isHostSignalScopeEvent = false;
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
if (event) {
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);

View File

@@ -1364,5 +1364,171 @@ HWTEST2_F(CreateCommandListXeHpcTest, whenFlagDisabledAndCreateImmediateCommandL
EXPECT_TRUE(static_cast<DeviceImp *>(device)->allocationsForReuse->peekIsEmpty());
}
struct AppendKernelXeHpcTestInput {
DriverHandle *driver = nullptr;
L0::Context *context = nullptr;
L0::Device *device = nullptr;
};
template <int32_t usePipeControlMultiPacketEventSync>
struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModuleFixture {
void setUp() {
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
LocalMemoryModuleFixture::setUp();
input.driver = driverHandle.get();
input.device = device;
input.context = context;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
const void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_device_mem_alloc_desc_t deviceDesc = {};
result = input.context->allocDeviceMem(input.device->toHandle(),
&deviceDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
constexpr size_t offset = 32;
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->commandContainer.getCommandStream()->getCpuBase(),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
EXPECT_NE(0u, itorWalkers.size());
for (const auto &it : itorWalkers) {
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
}
result = input.context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void testHostSignalScopeHostMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
ze_result_t result = ZE_RESULT_SUCCESS;
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
constexpr size_t size = 4096u;
constexpr size_t alignment = 4096u;
void *ptr = nullptr;
const void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_host_mem_alloc_desc_t hostDesc = {};
result = input.context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
constexpr size_t offset = 32;
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList commands;
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
commands,
commandList->commandContainer.getCommandStream()->getCpuBase(),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
EXPECT_NE(0u, itorWalkers.size());
for (const auto &it : itorWalkers) {
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
}
result = input.context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
AppendKernelXeHpcTestInput input;
};
using CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<0>>;
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
}
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
}
using CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<1>>;
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
}
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
}
} // namespace ult
} // namespace L0